libpostproc/postprocess.c

   1 /*
   2  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
   3  *
   4  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 /**
  24  * @file libpostproc/postprocess.c
  25  * postprocessing.
  26  */
  27
  28 /*
  29                         C       MMX     MMX2    3DNow   AltiVec
  30 isVertDC                Ec      Ec                      Ec
  31 isVertMinMaxOk          Ec      Ec                      Ec
  32 doVertLowPass           E               e       e       Ec
  33 doVertDefFilter         Ec      Ec      e       e       Ec
  34 isHorizDC               Ec      Ec                      Ec
  35 isHorizMinMaxOk         a       E                       Ec
  36 doHorizLowPass          E               e       e       Ec
  37 doHorizDefFilter        Ec      Ec      e       e       Ec
  38 do_a_deblock            Ec      E       Ec      E
  39 deRing                  E               e       e*      Ecp
  40 Vertical RKAlgo1        E               a       a
  41 Horizontal RKAlgo1                      a       a
  42 Vertical X1#            a               E       E
  43 Horizontal X1#          a               E       E
  44 LinIpolDeinterlace      e               E       E*
  45 CubicIpolDeinterlace    a               e       e*
  46 LinBlendDeinterlace     e               E       E*
  47 MedianDeinterlace#      E       Ec      Ec
  48 TempDeNoiser#           E               e       e       Ec
  49
  50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
  51 # more or less selfinvented filters so the exactness is not too meaningful
  52 E = Exact implementation
  53 e = almost exact implementation (slightly different rounding,...)
  54 a = alternative / approximate impl
  55 c = checked against the other implementations (-vo md5)
  56 p = partially optimized, still some work to do
  57 */
  58
  59 /*
  60 TODO:
  61 reduce the time wasted on the mem transfer
  62 unroll stuff if instructions depend too much on the prior one
  63 move YScale thing to the end instead of fixing QP
  64 write a faster and higher quality deblocking filter :)
  65 make the mainloop more flexible (variable number of blocks at once
  66         (the if/else stuff per block is slowing things down)
  67 compare the quality & speed of all filters
  68 split this huge file
  69 optimize c versions
  70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
  71 ...
  72 */
  73
  74 //Changelog: use the Subversion log
  75
  76 #include "config.h"
  77 #include "libavutil/avutil.h"
  78 #include <inttypes.h>
  79 #include <stdio.h>
  80 #include <stdlib.h>
  81 #include <string.h>
  82 //#undef HAVE_MMX2
  83 //#define HAVE_AMD3DNOW
  84 //#undef HAVE_MMX
  85 //#undef ARCH_X86
  86 //#define DEBUG_BRIGHTNESS
  87 #include "postprocess.h"
  88 #include "postprocess_internal.h"
  89
  90 unsigned postproc_version(void)
  91 {
  92     return LIBPOSTPROC_VERSION_INT;
  93 }
  94
  95 #if HAVE_ALTIVEC_H
  96 #include <altivec.h>
  97 #endif
  98
  99 #define GET_MODE_BUFFER_SIZE 500
 100 #define OPTIONS_ARRAY_SIZE 10
 101 #define BLOCK_SIZE 8
 102 #define TEMP_STRIDE 8
 103 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
 104
 105 #if ARCH_X86
 106 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 107 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 108 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
 109 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
 110 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
 111 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
 112 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
 113 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
 114 #endif
 115
 116 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
 117
 118
 119 static struct PPFilter filters[]=
 120 {
 121     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
 122     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
 123 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
 124     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
 125     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
 126     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
 127     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
 128     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
 129     {"dr", "dering",                1, 5, 6, DERING},
 130     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
 131     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
 132     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
 133     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
 134     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
 135     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
 136     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
 137     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
 138     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
 139     {NULL, NULL,0,0,0,0} //End Marker
 140 };
 141
 142 static const char *replaceTable[]=
 143 {
 144     "default",      "hb:a,vb:a,dr:a",
 145     "de",           "hb:a,vb:a,dr:a",
 146     "fast",         "h1:a,v1:a,dr:a",
 147     "fa",           "h1:a,v1:a,dr:a",
 148     "ac",           "ha:a:128:7,va:a,dr:a",
 149     NULL //End Marker
 150 };
 151
 152
 153 #if ARCH_X86
 154 static inline void prefetchnta(void *p)
 155 {
 156     __asm__ volatile(   "prefetchnta (%0)\n\t"
 157         : : "r" (p)
 158     );
 159 }
 160
 161 static inline void prefetcht0(void *p)
 162 {
 163     __asm__ volatile(   "prefetcht0 (%0)\n\t"
 164         : : "r" (p)
 165     );
 166 }
 167
 168 static inline void prefetcht1(void *p)
 169 {
 170     __asm__ volatile(   "prefetcht1 (%0)\n\t"
 171         : : "r" (p)
 172     );
 173 }
 174
 175 static inline void prefetcht2(void *p)
 176 {
 177     __asm__ volatile(   "prefetcht2 (%0)\n\t"
 178         : : "r" (p)
 179     );
 180 }
 181 #endif
 182
 183 /* The horizontal functions exist only in C because the MMX
 184  * code is faster with vertical filters and transposing. */
 185
 186 /**
 187  * Check if the given 8x8 Block is mostly "flat"
 188  */
 189 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
 190 {
 191     int numEq= 0;
 192     int y;
 193     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 194     const int dcThreshold= dcOffset*2 + 1;
 195
 196     for(y=0; y<BLOCK_SIZE; y++){
 197         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
 198         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
 199         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
 200         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
 201         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
 202         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
 203         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
 204         src+= stride;
 205     }
 206     return numEq > c->ppMode.flatnessThreshold;
 207 }
 208
 209 /**
 210  * Check if the middle 8x8 Block in the given 8x16 block is flat
 211  */
 212 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
 213 {
 214     int numEq= 0;
 215     int y;
 216     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 217     const int dcThreshold= dcOffset*2 + 1;
 218
 219     src+= stride*4; // src points to begin of the 8x8 Block
 220     for(y=0; y<BLOCK_SIZE-1; y++){
 221         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
 222         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
 223         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
 224         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
 225         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
 226         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
 227         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
 228         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
 229         src+= stride;
 230     }
 231     return numEq > c->ppMode.flatnessThreshold;
 232 }
 233
 234 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
 235 {
 236     int i;
 237 #if 1
 238     for(i=0; i<2; i++){
 239         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
 240         src += stride;
 241         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
 242         src += stride;
 243         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
 244         src += stride;
 245         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
 246         src += stride;
 247     }
 248 #else
 249     for(i=0; i<8; i++){
 250         if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
 251         src += stride;
 252     }
 253 #endif
 254     return 1;
 255 }
 256
 257 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
 258 {
 259 #if 1
 260 #if 1
 261     int x;
 262     src+= stride*4;
 263     for(x=0; x<BLOCK_SIZE; x+=4){
 264         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
 265         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
 266         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
 267         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
 268     }
 269 #else
 270     int x;
 271     src+= stride*3;
 272     for(x=0; x<BLOCK_SIZE; x++){
 273         if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
 274     }
 275 #endif
 276     return 1;
 277 #else
 278     int x;
 279     src+= stride*4;
 280     for(x=0; x<BLOCK_SIZE; x++){
 281         int min=255;
 282         int max=0;
 283         int y;
 284         for(y=0; y<8; y++){
 285             int v= src[x + y*stride];
 286             if(v>max) max=v;
 287             if(v<min) min=v;
 288         }
 289         if(max-min > 2*QP) return 0;
 290     }
 291     return 1;
 292 #endif
 293 }
 294
 295 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
 296 {
 297     if( isHorizDC_C(src, stride, c) ){
 298         if( isHorizMinMaxOk_C(src, stride, c->QP) )
 299             return 1;
 300         else
 301             return 0;
 302     }else{
 303         return 2;
 304     }
 305 }
 306
 307 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
 308 {
 309     if( isVertDC_C(src, stride, c) ){
 310         if( isVertMinMaxOk_C(src, stride, c->QP) )
 311             return 1;
 312         else
 313             return 0;
 314     }else{
 315         return 2;
 316     }
 317 }
 318
 319 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
 320 {
 321     int y;
 322     for(y=0; y<BLOCK_SIZE; y++){
 323         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
 324
 325         if(FFABS(middleEnergy) < 8*c->QP){
 326             const int q=(dst[3] - dst[4])/2;
 327             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
 328             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
 329
 330             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 331             d= FFMAX(d, 0);
 332
 333             d= (5*d + 32) >> 6;
 334             d*= FFSIGN(-middleEnergy);
 335
 336             if(q>0)
 337             {
 338                 d= d<0 ? 0 : d;
 339                 d= d>q ? q : d;
 340             }
 341             else
 342             {
 343                 d= d>0 ? 0 : d;
 344                 d= d<q ? q : d;
 345             }
 346
 347             dst[3]-= d;
 348             dst[4]+= d;
 349         }
 350         dst+= stride;
 351     }
 352 }
 353
 354 /**
 355  * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
 356  * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
 357  */
 358 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 359 {
 360     int y;
 361     for(y=0; y<BLOCK_SIZE; y++){
 362         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
 363         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
 364
 365         int sums[10];
 366         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
 367         sums[1] = sums[0] - first  + dst[3];
 368         sums[2] = sums[1] - first  + dst[4];
 369         sums[3] = sums[2] - first  + dst[5];
 370         sums[4] = sums[3] - first  + dst[6];
 371         sums[5] = sums[4] - dst[0] + dst[7];
 372         sums[6] = sums[5] - dst[1] + last;
 373         sums[7] = sums[6] - dst[2] + last;
 374         sums[8] = sums[7] - dst[3] + last;
 375         sums[9] = sums[8] - dst[4] + last;
 376
 377         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
 378         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
 379         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
 380         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
 381         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
 382         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
 383         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
 384         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
 385
 386         dst+= stride;
 387     }
 388 }
 389
 390 /**
 391  * Experimental Filter 1 (Horizontal)
 392  * will not damage linear gradients
 393  * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
 394  * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
 395  * MMX2 version does correct clipping C version does not
 396  * not identical with the vertical one
 397  */
 398 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 399 {
 400     int y;
 401     static uint64_t *lut= NULL;
 402     if(lut==NULL)
 403     {
 404         int i;
 405         lut = av_malloc(256*8);
 406         for(i=0; i<256; i++)
 407         {
 408             int v= i < 128 ? 2*i : 2*(i-256);
 409 /*
 410 //Simulate 112242211 9-Tap filter
 411             uint64_t a= (v/16)  & 0xFF;
 412             uint64_t b= (v/8)   & 0xFF;
 413             uint64_t c= (v/4)   & 0xFF;
 414             uint64_t d= (3*v/8) & 0xFF;
 415 */
 416 //Simulate piecewise linear interpolation
 417             uint64_t a= (v/16)   & 0xFF;
 418             uint64_t b= (v*3/16) & 0xFF;
 419             uint64_t c= (v*5/16) & 0xFF;
 420             uint64_t d= (7*v/16) & 0xFF;
 421             uint64_t A= (0x100 - a)&0xFF;
 422             uint64_t B= (0x100 - b)&0xFF;
 423             uint64_t C= (0x100 - c)&0xFF;
 424             uint64_t D= (0x100 - c)&0xFF;
 425
 426             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
 427                        (D<<24) | (C<<16) | (B<<8)  | (A);
 428             //lut[i] = (v<<32) | (v<<24);
 429         }
 430     }
 431
 432     for(y=0; y<BLOCK_SIZE; y++){
 433         int a= src[1] - src[2];
 434         int b= src[3] - src[4];
 435         int c= src[5] - src[6];
 436
 437         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
 438
 439         if(d < QP){
 440             int v = d * FFSIGN(-b);
 441
 442             src[1] +=v/8;
 443             src[2] +=v/4;
 444             src[3] +=3*v/8;
 445             src[4] -=3*v/8;
 446             src[5] -=v/4;
 447             src[6] -=v/8;
 448         }
 449         src+=stride;
 450     }
 451 }
 452
 453 /**
 454  * accurate deblock filter
 455  */
 456 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
 457     int y;
 458     const int QP= c->QP;
 459     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
 460     const int dcThreshold= dcOffset*2 + 1;
 461 //START_TIMER
 462     src+= step*4; // src points to begin of the 8x8 Block
 463     for(y=0; y<8; y++){
 464         int numEq= 0;
 465
 466         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
 467         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
 468         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
 469         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
 470         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
 471         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
 472         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
 473         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
 474         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
 475         if(numEq > c->ppMode.flatnessThreshold){
 476             int min, max, x;
 477
 478             if(src[0] > src[step]){
 479                 max= src[0];
 480                 min= src[step];
 481             }else{
 482                 max= src[step];
 483                 min= src[0];
 484             }
 485             for(x=2; x<8; x+=2){
 486                 if(src[x*step] > src[(x+1)*step]){
 487                         if(src[x    *step] > max) max= src[ x   *step];
 488                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
 489                 }else{
 490                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
 491                         if(src[ x   *step] < min) min= src[ x   *step];
 492                 }
 493             }
 494             if(max-min < 2*QP){
 495                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
 496                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
 497
 498                 int sums[10];
 499                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
 500                 sums[1] = sums[0] - first       + src[3*step];
 501                 sums[2] = sums[1] - first       + src[4*step];
 502                 sums[3] = sums[2] - first       + src[5*step];
 503                 sums[4] = sums[3] - first       + src[6*step];
 504                 sums[5] = sums[4] - src[0*step] + src[7*step];
 505                 sums[6] = sums[5] - src[1*step] + last;
 506                 sums[7] = sums[6] - src[2*step] + last;
 507                 sums[8] = sums[7] - src[3*step] + last;
 508                 sums[9] = sums[8] - src[4*step] + last;
 509
 510                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
 511                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
 512                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
 513                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
 514                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
 515                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
 516                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
 517                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
 518             }
 519         }else{
 520             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
 521
 522             if(FFABS(middleEnergy) < 8*QP){
 523                 const int q=(src[3*step] - src[4*step])/2;
 524                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
 525                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
 526
 527                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
 528                 d= FFMAX(d, 0);
 529
 530                 d= (5*d + 32) >> 6;
 531                 d*= FFSIGN(-middleEnergy);
 532
 533                 if(q>0){
 534                     d= d<0 ? 0 : d;
 535                     d= d>q ? q : d;
 536                 }else{
 537                     d= d>0 ? 0 : d;
 538                     d= d<q ? q : d;
 539                 }
 540
 541                 src[3*step]-= d;
 542                 src[4*step]+= d;
 543             }
 544         }
 545
 546         src += stride;
 547     }
 548 /*if(step==16){
 549     STOP_TIMER("step16")
 550 }else{
 551     STOP_TIMER("stepX")
 552 }*/
 553 }
 554
 555 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
 556 //Plain C versions
 557 #if !(HAVE_MMX || HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)
 558 #define COMPILE_C
 559 #endif
 560
 561 #if HAVE_ALTIVEC
 562 #define COMPILE_ALTIVEC
 563 #endif //HAVE_ALTIVEC
 564
 565 #if ARCH_X86
 566
 567 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
 568 #define COMPILE_MMX
 569 #endif
 570
 571 #if HAVE_MMX2 || defined (RUNTIME_CPUDETECT)
 572 #define COMPILE_MMX2
 573 #endif
 574
 575 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
 576 #define COMPILE_3DNOW
 577 #endif
 578 #endif /* ARCH_X86 */
 579
 580 #undef HAVE_MMX
 581 #define HAVE_MMX 0
 582 #undef HAVE_MMX2
 583 #define HAVE_MMX2 0
 584 #undef HAVE_AMD3DNOW
 585 #define HAVE_AMD3DNOW 0
 586 #undef HAVE_ALTIVEC
 587 #define HAVE_ALTIVEC 0
 588
 589 #ifdef COMPILE_C
 590 #define RENAME(a) a ## _C
 591 #include "postprocess_template.c"
 592 #endif
 593
 594 #ifdef COMPILE_ALTIVEC
 595 #undef RENAME
 596 #undef HAVE_ALTIVEC
 597 #define HAVE_ALTIVEC 1
 598 #define RENAME(a) a ## _altivec
 599 #include "postprocess_altivec_template.c"
 600 #include "postprocess_template.c"
 601 #endif
 602
 603 //MMX versions
 604 #ifdef COMPILE_MMX
 605 #undef RENAME
 606 #undef HAVE_MMX
 607 #define HAVE_MMX 1
 608 #define RENAME(a) a ## _MMX
 609 #include "postprocess_template.c"
 610 #endif
 611
 612 //MMX2 versions
 613 #ifdef COMPILE_MMX2
 614 #undef RENAME
 615 #undef HAVE_MMX
 616 #undef HAVE_MMX2
 617 #define HAVE_MMX 1
 618 #define HAVE_MMX2 1
 619 #define RENAME(a) a ## _MMX2
 620 #include "postprocess_template.c"
 621 #endif
 622
 623 //3DNOW versions
 624 #ifdef COMPILE_3DNOW
 625 #undef RENAME
 626 #undef HAVE_MMX
 627 #undef HAVE_MMX2
 628 #undef HAVE_AMD3DNOW
 629 #define HAVE_MMX 1
 630 #define HAVE_MMX2 0
 631 #define HAVE_AMD3DNOW 1
 632 #define RENAME(a) a ## _3DNow
 633 #include "postprocess_template.c"
 634 #endif
 635
 636 // minor note: the HAVE_xyz is messed up after that line so do not use it.
 637
 638 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 639         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
 640 {
 641     PPContext *c= (PPContext *)vc;
 642     PPMode *ppMode= (PPMode *)vm;
 643     c->ppMode= *ppMode; //FIXME
 644
 645     // Using ifs here as they are faster than function pointers although the
 646     // difference would not be measurable here but it is much better because
 647     // someone might exchange the CPU whithout restarting MPlayer ;)
 648 #ifdef RUNTIME_CPUDETECT
 649 #if ARCH_X86
 650     // ordered per speed fastest first
 651     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
 652         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 653     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
 654         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 655     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
 656         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 657     else
 658         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 659 #else
 660 #if HAVE_ALTIVEC
 661     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
 662             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 663     else
 664 #endif
 665             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 666 #endif
 667 #else //RUNTIME_CPUDETECT
 668 #if   HAVE_MMX2
 669             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 670 #elif HAVE_AMD3DNOW
 671             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 672 #elif HAVE_MMX
 673             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 674 #elif HAVE_ALTIVEC
 675             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 676 #else
 677             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 678 #endif
 679 #endif //!RUNTIME_CPUDETECT
 680 }
 681
 682 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
 683 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
 684
 685 /* -pp Command line Help
 686 */
 687 #if LIBPOSTPROC_VERSION_INT < (52<<16)
 688 const char *const pp_help=
 689 #else
 690 const char pp_help[] =
 691 #endif
 692 "Available postprocessing filters:\n"
 693 "Filters                        Options\n"
 694 "short  long name       short   long option     Description\n"
 695 "*      *               a       autoq           CPU power dependent enabler\n"
 696 "                       c       chrom           chrominance filtering enabled\n"
 697 "                       y       nochrom         chrominance filtering disabled\n"
 698 "                       n       noluma          luma filtering disabled\n"
 699 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
 700 "       1. difference factor: default=32, higher -> more deblocking\n"
 701 "       2. flatness threshold: default=39, lower -> more deblocking\n"
 702 "                       the h & v deblocking filters share these\n"
 703 "                       so you can't set different thresholds for h / v\n"
 704 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
 705 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
 706 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
 707 "h1     x1hdeblock                              experimental h deblock filter 1\n"
 708 "v1     x1vdeblock                              experimental v deblock filter 1\n"
 709 "dr     dering                                  deringing filter\n"
 710 "al     autolevels                              automatic brightness / contrast\n"
 711 "                       f        fullyrange     stretch luminance to (0..255)\n"
 712 "lb     linblenddeint                           linear blend deinterlacer\n"
 713 "li     linipoldeint                            linear interpolating deinterlace\n"
 714 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
 715 "md     mediandeint                             median deinterlacer\n"
 716 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
 717 "l5     lowpass5                                FIR lowpass deinterlacer\n"
 718 "de     default                                 hb:a,vb:a,dr:a\n"
 719 "fa     fast                                    h1:a,v1:a,dr:a\n"
 720 "ac                                             ha:a:128:7,va:a,dr:a\n"
 721 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
 722 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
 723 "fq     forceQuant      <quantizer>             force quantizer\n"
 724 "Usage:\n"
 725 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 726 "long form example:\n"
 727 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 728 "short form example:\n"
 729 "vb:a/hb:a/lb                                   de,-vb\n"
 730 "more examples:\n"
 731 "tn:64:128:256\n"
 732 "\n"
 733 ;
 734
 735 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
 736 {
 737     char temp[GET_MODE_BUFFER_SIZE];
 738     char *p= temp;
 739     static const char filterDelimiters[] = ",/";
 740     static const char optionDelimiters[] = ":";
 741     struct PPMode *ppMode;
 742     char *filterToken;
 743
 744     ppMode= av_malloc(sizeof(PPMode));
 745
 746     ppMode->lumMode= 0;
 747     ppMode->chromMode= 0;
 748     ppMode->maxTmpNoise[0]= 700;
 749     ppMode->maxTmpNoise[1]= 1500;
 750     ppMode->maxTmpNoise[2]= 3000;
 751     ppMode->maxAllowedY= 234;
 752     ppMode->minAllowedY= 16;
 753     ppMode->baseDcDiff= 256/8;
 754     ppMode->flatnessThreshold= 56-16-1;
 755     ppMode->maxClippedThreshold= 0.01;
 756     ppMode->error=0;
 757
 758     strncpy(temp, name, GET_MODE_BUFFER_SIZE);
 759
 760     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
 761
 762     for(;;){
 763         char *filterName;
 764         int q= 1000000; //PP_QUALITY_MAX;
 765         int chrom=-1;
 766         int luma=-1;
 767         char *option;
 768         char *options[OPTIONS_ARRAY_SIZE];
 769         int i;
 770         int filterNameOk=0;
 771         int numOfUnknownOptions=0;
 772         int enable=1; //does the user want us to enabled or disabled the filter
 773
 774         filterToken= strtok(p, filterDelimiters);
 775         if(filterToken == NULL) break;
 776         p+= strlen(filterToken) + 1; // p points to next filterToken
 777         filterName= strtok(filterToken, optionDelimiters);
 778         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
 779
 780         if(*filterName == '-'){
 781             enable=0;
 782             filterName++;
 783         }
 784
 785         for(;;){ //for all options
 786             option= strtok(NULL, optionDelimiters);
 787             if(option == NULL) break;
 788
 789             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
 790             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
 791             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
 792             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
 793             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
 794             else{
 795                 options[numOfUnknownOptions] = option;
 796                 numOfUnknownOptions++;
 797             }
 798             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
 799         }
 800         options[numOfUnknownOptions] = NULL;
 801
 802         /* replace stuff from the replace Table */
 803         for(i=0; replaceTable[2*i]!=NULL; i++){
 804             if(!strcmp(replaceTable[2*i], filterName)){
 805                 int newlen= strlen(replaceTable[2*i + 1]);
 806                 int plen;
 807                 int spaceLeft;
 808
 809                 if(p==NULL) p= temp, *p=0;      //last filter
 810                 else p--, *p=',';               //not last filter
 811
 812                 plen= strlen(p);
 813                 spaceLeft= p - temp + plen;
 814                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
 815                     ppMode->error++;
 816                     break;
 817                 }
 818                 memmove(p + newlen, p, plen+1);
 819                 memcpy(p, replaceTable[2*i + 1], newlen);
 820                 filterNameOk=1;
 821             }
 822         }
 823
 824         for(i=0; filters[i].shortName!=NULL; i++){
 825             if(   !strcmp(filters[i].longName, filterName)
 826                || !strcmp(filters[i].shortName, filterName)){
 827                 ppMode->lumMode &= ~filters[i].mask;
 828                 ppMode->chromMode &= ~filters[i].mask;
 829
 830                 filterNameOk=1;
 831                 if(!enable) break; // user wants to disable it
 832
 833                 if(q >= filters[i].minLumQuality && luma)
 834                     ppMode->lumMode|= filters[i].mask;
 835                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
 836                     if(q >= filters[i].minChromQuality)
 837                             ppMode->chromMode|= filters[i].mask;
 838
 839                 if(filters[i].mask == LEVEL_FIX){
 840                     int o;
 841                     ppMode->minAllowedY= 16;
 842                     ppMode->maxAllowedY= 234;
 843                     for(o=0; options[o]!=NULL; o++){
 844                         if(  !strcmp(options[o],"fullyrange")
 845                            ||!strcmp(options[o],"f")){
 846                             ppMode->minAllowedY= 0;
 847                             ppMode->maxAllowedY= 255;
 848                             numOfUnknownOptions--;
 849                         }
 850                     }
 851                 }
 852                 else if(filters[i].mask == TEMP_NOISE_FILTER)
 853                 {
 854                     int o;
 855                     int numOfNoises=0;
 856
 857                     for(o=0; options[o]!=NULL; o++){
 858                         char *tail;
 859                         ppMode->maxTmpNoise[numOfNoises]=
 860                             strtol(options[o], &tail, 0);
 861                         if(tail!=options[o]){
 862                             numOfNoises++;
 863                             numOfUnknownOptions--;
 864                             if(numOfNoises >= 3) break;
 865                         }
 866                     }
 867                 }
 868                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
 869                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
 870                     int o;
 871
 872                     for(o=0; options[o]!=NULL && o<2; o++){
 873                         char *tail;
 874                         int val= strtol(options[o], &tail, 0);
 875                         if(tail==options[o]) break;
 876
 877                         numOfUnknownOptions--;
 878                         if(o==0) ppMode->baseDcDiff= val;
 879                         else ppMode->flatnessThreshold= val;
 880                     }
 881                 }
 882                 else if(filters[i].mask == FORCE_QUANT){
 883                     int o;
 884                     ppMode->forcedQuant= 15;
 885
 886                     for(o=0; options[o]!=NULL && o<1; o++){
 887                         char *tail;
 888                         int val= strtol(options[o], &tail, 0);
 889                         if(tail==options[o]) break;
 890
 891                         numOfUnknownOptions--;
 892                         ppMode->forcedQuant= val;
 893                     }
 894                 }
 895             }
 896         }
 897         if(!filterNameOk) ppMode->error++;
 898         ppMode->error += numOfUnknownOptions;
 899     }
 900
 901     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
 902     if(ppMode->error){
 903         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
 904         av_free(ppMode);
 905         return NULL;
 906     }
 907     return ppMode;
 908 }
 909
 910 void pp_free_mode(pp_mode *mode){
 911     av_free(mode);
 912 }
 913
 914 static void reallocAlign(void **p, int alignment, int size){
 915     av_free(*p);
 916     *p= av_mallocz(size);
 917 }
 918
 919 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
 920     int mbWidth = (width+15)>>4;
 921     int mbHeight= (height+15)>>4;
 922     int i;
 923
 924     c->stride= stride;
 925     c->qpStride= qpStride;
 926
 927     reallocAlign((void **)&c->tempDst, 8, stride*24);
 928     reallocAlign((void **)&c->tempSrc, 8, stride*24);
 929     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
 930     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
 931     for(i=0; i<256; i++)
 932             c->yHistogram[i]= width*height/64*15/256;
 933
 934     for(i=0; i<3; i++){
 935         //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
 936         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
 937         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
 938     }
 939
 940     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
 941     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 942     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
 943     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 944 }
 945
 946 static const char * context_to_name(void * ptr) {
 947     return "postproc";
 948 }
 949
 950 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
 951
 952 pp_context *pp_get_context(int width, int height, int cpuCaps){
 953     PPContext *c= av_malloc(sizeof(PPContext));
 954     int stride= (width+15)&(~15);    //assumed / will realloc if needed
 955     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
 956
 957     memset(c, 0, sizeof(PPContext));
 958     c->av_class = &av_codec_context_class;
 959     c->cpuCaps= cpuCaps;
 960     if(cpuCaps&PP_FORMAT){
 961         c->hChromaSubSample= cpuCaps&0x3;
 962         c->vChromaSubSample= (cpuCaps>>4)&0x3;
 963     }else{
 964         c->hChromaSubSample= 1;
 965         c->vChromaSubSample= 1;
 966     }
 967
 968     reallocBuffers(c, width, height, stride, qpStride);
 969
 970     c->frameNum=-1;
 971
 972     return c;
 973 }
 974
 975 void pp_free_context(void *vc){
 976     PPContext *c = (PPContext*)vc;
 977     int i;
 978
 979     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
 980     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
 981
 982     av_free(c->tempBlocks);
 983     av_free(c->yHistogram);
 984     av_free(c->tempDst);
 985     av_free(c->tempSrc);
 986     av_free(c->deintTemp);
 987     av_free(c->stdQPTable);
 988     av_free(c->nonBQPTable);
 989     av_free(c->forcedQPTable);
 990
 991     memset(c, 0, sizeof(PPContext));
 992
 993     av_free(c);
 994 }
 995
 996 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
 997                      uint8_t * dst[3], const int dstStride[3],
 998                      int width, int height,
 999                      const QP_STORE_T *QP_store,  int QPStride,
1000                      pp_mode *vm,  void *vc, int pict_type)
1001 {
1002     int mbWidth = (width+15)>>4;
1003     int mbHeight= (height+15)>>4;
1004     PPMode *mode = (PPMode*)vm;
1005     PPContext *c = (PPContext*)vc;
1006     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1007     int absQPStride = FFABS(QPStride);
1008
1009     // c->stride and c->QPStride are always positive
1010     if(c->stride < minStride || c->qpStride < absQPStride)
1011         reallocBuffers(c, width, height,
1012                        FFMAX(minStride, c->stride),
1013                        FFMAX(c->qpStride, absQPStride));
1014
1015     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1016         int i;
1017         QP_store= c->forcedQPTable;
1018         absQPStride = QPStride = 0;
1019         if(mode->lumMode & FORCE_QUANT)
1020             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1021         else
1022             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1023     }
1024
1025     if(pict_type & PP_PICT_TYPE_QP2){
1026         int i;
1027         const int count= mbHeight * absQPStride;
1028         for(i=0; i<(count>>2); i++){
1029             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1030         }
1031         for(i<<=2; i<count; i++){
1032             c->stdQPTable[i] = QP_store[i]>>1;
1033         }
1034         QP_store= c->stdQPTable;
1035         QPStride= absQPStride;
1036     }
1037
1038     if(0){
1039         int x,y;
1040         for(y=0; y<mbHeight; y++){
1041             for(x=0; x<mbWidth; x++){
1042                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1043             }
1044             av_log(c, AV_LOG_INFO, "\n");
1045         }
1046         av_log(c, AV_LOG_INFO, "\n");
1047     }
1048
1049     if((pict_type&7)!=3){
1050         if (QPStride >= 0){
1051             int i;
1052             const int count= mbHeight * QPStride;
1053             for(i=0; i<(count>>2); i++){
1054                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1055             }
1056             for(i<<=2; i<count; i++){
1057                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
1058             }
1059         } else {
1060             int i,j;
1061             for(i=0; i<mbHeight; i++) {
1062                 for(j=0; j<absQPStride; j++) {
1063                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1064                 }
1065             }
1066         }
1067     }
1068
1069     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1070            mode->lumMode, mode->chromMode);
1071
1072     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1073                 width, height, QP_store, QPStride, 0, mode, c);
1074
1075     width  = (width )>>c->hChromaSubSample;
1076     height = (height)>>c->vChromaSubSample;
1077
1078     if(mode->chromMode){
1079         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1080                     width, height, QP_store, QPStride, 1, mode, c);
1081         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1082                     width, height, QP_store, QPStride, 2, mode, c);
1083     }
1084     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1085         linecpy(dst[1], src[1], height, srcStride[1]);
1086         linecpy(dst[2], src[2], height, srcStride[2]);
1087     }else{
1088         int y;
1089         for(y=0; y<height; y++){
1090             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1091             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1092         }
1093     }
1094 }
1095