2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * @file libpostproc/postprocess.c
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use the Subversion log
77 #include "libavutil/avutil.h"
83 //#define HAVE_AMD3DNOW
86 //#define DEBUG_BRIGHTNESS
87 #include "postprocess.h"
88 #include "postprocess_internal.h"
90 unsigned postproc_version(void)
92 return LIBPOSTPROC_VERSION_INT
;
99 #define GET_MODE_BUFFER_SIZE 500
100 #define OPTIONS_ARRAY_SIZE 10
102 #define TEMP_STRIDE 8
103 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
106 DECLARE_ASM_CONST(8, uint64_t, w05
)= 0x0005000500050005LL
;
107 DECLARE_ASM_CONST(8, uint64_t, w04
)= 0x0004000400040004LL
;
108 DECLARE_ASM_CONST(8, uint64_t, w20
)= 0x0020002000200020LL
;
109 DECLARE_ASM_CONST(8, uint64_t, b00
)= 0x0000000000000000LL
;
110 DECLARE_ASM_CONST(8, uint64_t, b01
)= 0x0101010101010101LL
;
111 DECLARE_ASM_CONST(8, uint64_t, b02
)= 0x0202020202020202LL
;
112 DECLARE_ASM_CONST(8, uint64_t, b08
)= 0x0808080808080808LL
;
113 DECLARE_ASM_CONST(8, uint64_t, b80
)= 0x8080808080808080LL
;
116 DECLARE_ASM_CONST(8, int, deringThreshold
)= 20;
119 static struct PPFilter filters
[]=
121 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK
},
122 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK
},
123 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
124 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
125 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER
},
126 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER
},
127 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK
},
128 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK
},
129 {"dr", "dering", 1, 5, 6, DERING
},
130 {"al", "autolevels", 0, 1, 2, LEVEL_FIX
},
131 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER
},
132 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER
},
133 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER
},
134 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER
},
135 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER
},
136 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER
},
137 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER
},
138 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT
},
139 {NULL
, NULL
,0,0,0,0} //End Marker
142 static const char *replaceTable
[]=
144 "default", "hb:a,vb:a,dr:a",
145 "de", "hb:a,vb:a,dr:a",
146 "fast", "h1:a,v1:a,dr:a",
147 "fa", "h1:a,v1:a,dr:a",
148 "ac", "ha:a:128:7,va:a,dr:a",
154 static inline void prefetchnta(void *p
)
156 __asm__
volatile( "prefetchnta (%0)\n\t"
161 static inline void prefetcht0(void *p
)
163 __asm__
volatile( "prefetcht0 (%0)\n\t"
168 static inline void prefetcht1(void *p
)
170 __asm__
volatile( "prefetcht1 (%0)\n\t"
175 static inline void prefetcht2(void *p
)
177 __asm__
volatile( "prefetcht2 (%0)\n\t"
183 /* The horizontal functions exist only in C because the MMX
184 * code is faster with vertical filters and transposing. */
187 * Check if the given 8x8 Block is mostly "flat"
189 static inline int isHorizDC_C(uint8_t src
[], int stride
, PPContext
*c
)
193 const int dcOffset
= ((c
->nonBQP
*c
->ppMode
.baseDcDiff
)>>8) + 1;
194 const int dcThreshold
= dcOffset
*2 + 1;
196 for(y
=0; y
<BLOCK_SIZE
; y
++){
197 if(((unsigned)(src
[0] - src
[1] + dcOffset
)) < dcThreshold
) numEq
++;
198 if(((unsigned)(src
[1] - src
[2] + dcOffset
)) < dcThreshold
) numEq
++;
199 if(((unsigned)(src
[2] - src
[3] + dcOffset
)) < dcThreshold
) numEq
++;
200 if(((unsigned)(src
[3] - src
[4] + dcOffset
)) < dcThreshold
) numEq
++;
201 if(((unsigned)(src
[4] - src
[5] + dcOffset
)) < dcThreshold
) numEq
++;
202 if(((unsigned)(src
[5] - src
[6] + dcOffset
)) < dcThreshold
) numEq
++;
203 if(((unsigned)(src
[6] - src
[7] + dcOffset
)) < dcThreshold
) numEq
++;
206 return numEq
> c
->ppMode
.flatnessThreshold
;
210 * Check if the middle 8x8 Block in the given 8x16 block is flat
212 static inline int isVertDC_C(uint8_t src
[], int stride
, PPContext
*c
)
216 const int dcOffset
= ((c
->nonBQP
*c
->ppMode
.baseDcDiff
)>>8) + 1;
217 const int dcThreshold
= dcOffset
*2 + 1;
219 src
+= stride
*4; // src points to begin of the 8x8 Block
220 for(y
=0; y
<BLOCK_SIZE
-1; y
++){
221 if(((unsigned)(src
[0] - src
[0+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
222 if(((unsigned)(src
[1] - src
[1+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
223 if(((unsigned)(src
[2] - src
[2+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
224 if(((unsigned)(src
[3] - src
[3+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
225 if(((unsigned)(src
[4] - src
[4+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
226 if(((unsigned)(src
[5] - src
[5+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
227 if(((unsigned)(src
[6] - src
[6+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
228 if(((unsigned)(src
[7] - src
[7+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
231 return numEq
> c
->ppMode
.flatnessThreshold
;
234 static inline int isHorizMinMaxOk_C(uint8_t src
[], int stride
, int QP
)
239 if((unsigned)(src
[0] - src
[5] + 2*QP
) > 4*QP
) return 0;
241 if((unsigned)(src
[2] - src
[7] + 2*QP
) > 4*QP
) return 0;
243 if((unsigned)(src
[4] - src
[1] + 2*QP
) > 4*QP
) return 0;
245 if((unsigned)(src
[6] - src
[3] + 2*QP
) > 4*QP
) return 0;
250 if((unsigned)(src
[0] - src
[7] + 2*QP
) > 4*QP
) return 0;
257 static inline int isVertMinMaxOk_C(uint8_t src
[], int stride
, int QP
)
263 for(x
=0; x
<BLOCK_SIZE
; x
+=4){
264 if((unsigned)(src
[ x
+ 0*stride
] - src
[ x
+ 5*stride
] + 2*QP
) > 4*QP
) return 0;
265 if((unsigned)(src
[1+x
+ 2*stride
] - src
[1+x
+ 7*stride
] + 2*QP
) > 4*QP
) return 0;
266 if((unsigned)(src
[2+x
+ 4*stride
] - src
[2+x
+ 1*stride
] + 2*QP
) > 4*QP
) return 0;
267 if((unsigned)(src
[3+x
+ 6*stride
] - src
[3+x
+ 3*stride
] + 2*QP
) > 4*QP
) return 0;
272 for(x
=0; x
<BLOCK_SIZE
; x
++){
273 if((unsigned)(src
[x
+ stride
] - src
[x
+ (stride
<<3)] + 2*QP
) > 4*QP
) return 0;
280 for(x
=0; x
<BLOCK_SIZE
; x
++){
285 int v
= src
[x
+ y
*stride
];
289 if(max
-min
> 2*QP
) return 0;
295 static inline int horizClassify_C(uint8_t src
[], int stride
, PPContext
*c
)
297 if( isHorizDC_C(src
, stride
, c
) ){
298 if( isHorizMinMaxOk_C(src
, stride
, c
->QP
) )
307 static inline int vertClassify_C(uint8_t src
[], int stride
, PPContext
*c
)
309 if( isVertDC_C(src
, stride
, c
) ){
310 if( isVertMinMaxOk_C(src
, stride
, c
->QP
) )
319 static inline void doHorizDefFilter_C(uint8_t dst
[], int stride
, PPContext
*c
)
322 for(y
=0; y
<BLOCK_SIZE
; y
++){
323 const int middleEnergy
= 5*(dst
[4] - dst
[3]) + 2*(dst
[2] - dst
[5]);
325 if(FFABS(middleEnergy
) < 8*c
->QP
){
326 const int q
=(dst
[3] - dst
[4])/2;
327 const int leftEnergy
= 5*(dst
[2] - dst
[1]) + 2*(dst
[0] - dst
[3]);
328 const int rightEnergy
= 5*(dst
[6] - dst
[5]) + 2*(dst
[4] - dst
[7]);
330 int d
= FFABS(middleEnergy
) - FFMIN( FFABS(leftEnergy
), FFABS(rightEnergy
) );
334 d
*= FFSIGN(-middleEnergy
);
355 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
356 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
358 static inline void doHorizLowPass_C(uint8_t dst
[], int stride
, PPContext
*c
)
361 for(y
=0; y
<BLOCK_SIZE
; y
++){
362 const int first
= FFABS(dst
[-1] - dst
[0]) < c
->QP
? dst
[-1] : dst
[0];
363 const int last
= FFABS(dst
[8] - dst
[7]) < c
->QP
? dst
[8] : dst
[7];
366 sums
[0] = 4*first
+ dst
[0] + dst
[1] + dst
[2] + 4;
367 sums
[1] = sums
[0] - first
+ dst
[3];
368 sums
[2] = sums
[1] - first
+ dst
[4];
369 sums
[3] = sums
[2] - first
+ dst
[5];
370 sums
[4] = sums
[3] - first
+ dst
[6];
371 sums
[5] = sums
[4] - dst
[0] + dst
[7];
372 sums
[6] = sums
[5] - dst
[1] + last
;
373 sums
[7] = sums
[6] - dst
[2] + last
;
374 sums
[8] = sums
[7] - dst
[3] + last
;
375 sums
[9] = sums
[8] - dst
[4] + last
;
377 dst
[0]= (sums
[0] + sums
[2] + 2*dst
[0])>>4;
378 dst
[1]= (sums
[1] + sums
[3] + 2*dst
[1])>>4;
379 dst
[2]= (sums
[2] + sums
[4] + 2*dst
[2])>>4;
380 dst
[3]= (sums
[3] + sums
[5] + 2*dst
[3])>>4;
381 dst
[4]= (sums
[4] + sums
[6] + 2*dst
[4])>>4;
382 dst
[5]= (sums
[5] + sums
[7] + 2*dst
[5])>>4;
383 dst
[6]= (sums
[6] + sums
[8] + 2*dst
[6])>>4;
384 dst
[7]= (sums
[7] + sums
[9] + 2*dst
[7])>>4;
391 * Experimental Filter 1 (Horizontal)
392 * will not damage linear gradients
393 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
394 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
395 * MMX2 version does correct clipping C version does not
396 * not identical with the vertical one
398 static inline void horizX1Filter(uint8_t *src
, int stride
, int QP
)
401 static uint64_t *lut
= NULL
;
405 lut
= av_malloc(256*8);
408 int v
= i
< 128 ? 2*i
: 2*(i
-256);
410 //Simulate 112242211 9-Tap filter
411 uint64_t a= (v/16) & 0xFF;
412 uint64_t b= (v/8) & 0xFF;
413 uint64_t c= (v/4) & 0xFF;
414 uint64_t d= (3*v/8) & 0xFF;
416 //Simulate piecewise linear interpolation
417 uint64_t a
= (v
/16) & 0xFF;
418 uint64_t b
= (v
*3/16) & 0xFF;
419 uint64_t c
= (v
*5/16) & 0xFF;
420 uint64_t d
= (7*v
/16) & 0xFF;
421 uint64_t A
= (0x100 - a
)&0xFF;
422 uint64_t B
= (0x100 - b
)&0xFF;
423 uint64_t C
= (0x100 - c
)&0xFF;
424 uint64_t D
= (0x100 - c
)&0xFF;
426 lut
[i
] = (a
<<56) | (b
<<48) | (c
<<40) | (d
<<32) |
427 (D
<<24) | (C
<<16) | (B
<<8) | (A
);
428 //lut[i] = (v<<32) | (v<<24);
432 for(y
=0; y
<BLOCK_SIZE
; y
++){
433 int a
= src
[1] - src
[2];
434 int b
= src
[3] - src
[4];
435 int c
= src
[5] - src
[6];
437 int d
= FFMAX(FFABS(b
) - (FFABS(a
) + FFABS(c
))/2, 0);
440 int v
= d
* FFSIGN(-b
);
454 * accurate deblock filter
456 static av_always_inline
void do_a_deblock_C(uint8_t *src
, int step
, int stride
, PPContext
*c
){
459 const int dcOffset
= ((c
->nonBQP
*c
->ppMode
.baseDcDiff
)>>8) + 1;
460 const int dcThreshold
= dcOffset
*2 + 1;
462 src
+= step
*4; // src points to begin of the 8x8 Block
466 if(((unsigned)(src
[-1*step
] - src
[0*step
] + dcOffset
)) < dcThreshold
) numEq
++;
467 if(((unsigned)(src
[ 0*step
] - src
[1*step
] + dcOffset
)) < dcThreshold
) numEq
++;
468 if(((unsigned)(src
[ 1*step
] - src
[2*step
] + dcOffset
)) < dcThreshold
) numEq
++;
469 if(((unsigned)(src
[ 2*step
] - src
[3*step
] + dcOffset
)) < dcThreshold
) numEq
++;
470 if(((unsigned)(src
[ 3*step
] - src
[4*step
] + dcOffset
)) < dcThreshold
) numEq
++;
471 if(((unsigned)(src
[ 4*step
] - src
[5*step
] + dcOffset
)) < dcThreshold
) numEq
++;
472 if(((unsigned)(src
[ 5*step
] - src
[6*step
] + dcOffset
)) < dcThreshold
) numEq
++;
473 if(((unsigned)(src
[ 6*step
] - src
[7*step
] + dcOffset
)) < dcThreshold
) numEq
++;
474 if(((unsigned)(src
[ 7*step
] - src
[8*step
] + dcOffset
)) < dcThreshold
) numEq
++;
475 if(numEq
> c
->ppMode
.flatnessThreshold
){
478 if(src
[0] > src
[step
]){
486 if(src
[x
*step
] > src
[(x
+1)*step
]){
487 if(src
[x
*step
] > max
) max
= src
[ x
*step
];
488 if(src
[(x
+1)*step
] < min
) min
= src
[(x
+1)*step
];
490 if(src
[(x
+1)*step
] > max
) max
= src
[(x
+1)*step
];
491 if(src
[ x
*step
] < min
) min
= src
[ x
*step
];
495 const int first
= FFABS(src
[-1*step
] - src
[0]) < QP
? src
[-1*step
] : src
[0];
496 const int last
= FFABS(src
[8*step
] - src
[7*step
]) < QP
? src
[8*step
] : src
[7*step
];
499 sums
[0] = 4*first
+ src
[0*step
] + src
[1*step
] + src
[2*step
] + 4;
500 sums
[1] = sums
[0] - first
+ src
[3*step
];
501 sums
[2] = sums
[1] - first
+ src
[4*step
];
502 sums
[3] = sums
[2] - first
+ src
[5*step
];
503 sums
[4] = sums
[3] - first
+ src
[6*step
];
504 sums
[5] = sums
[4] - src
[0*step
] + src
[7*step
];
505 sums
[6] = sums
[5] - src
[1*step
] + last
;
506 sums
[7] = sums
[6] - src
[2*step
] + last
;
507 sums
[8] = sums
[7] - src
[3*step
] + last
;
508 sums
[9] = sums
[8] - src
[4*step
] + last
;
510 src
[0*step
]= (sums
[0] + sums
[2] + 2*src
[0*step
])>>4;
511 src
[1*step
]= (sums
[1] + sums
[3] + 2*src
[1*step
])>>4;
512 src
[2*step
]= (sums
[2] + sums
[4] + 2*src
[2*step
])>>4;
513 src
[3*step
]= (sums
[3] + sums
[5] + 2*src
[3*step
])>>4;
514 src
[4*step
]= (sums
[4] + sums
[6] + 2*src
[4*step
])>>4;
515 src
[5*step
]= (sums
[5] + sums
[7] + 2*src
[5*step
])>>4;
516 src
[6*step
]= (sums
[6] + sums
[8] + 2*src
[6*step
])>>4;
517 src
[7*step
]= (sums
[7] + sums
[9] + 2*src
[7*step
])>>4;
520 const int middleEnergy
= 5*(src
[4*step
] - src
[3*step
]) + 2*(src
[2*step
] - src
[5*step
]);
522 if(FFABS(middleEnergy
) < 8*QP
){
523 const int q
=(src
[3*step
] - src
[4*step
])/2;
524 const int leftEnergy
= 5*(src
[2*step
] - src
[1*step
]) + 2*(src
[0*step
] - src
[3*step
]);
525 const int rightEnergy
= 5*(src
[6*step
] - src
[5*step
]) + 2*(src
[4*step
] - src
[7*step
]);
527 int d
= FFABS(middleEnergy
) - FFMIN( FFABS(leftEnergy
), FFABS(rightEnergy
) );
531 d
*= FFSIGN(-middleEnergy
);
555 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
557 #if !(HAVE_MMX || HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)
562 #define COMPILE_ALTIVEC
563 #endif //HAVE_ALTIVEC
567 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
571 #if HAVE_MMX2 || defined (RUNTIME_CPUDETECT)
575 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
576 #define COMPILE_3DNOW
578 #endif /* ARCH_X86 */
585 #define HAVE_AMD3DNOW 0
587 #define HAVE_ALTIVEC 0
590 #define RENAME(a) a ## _C
591 #include "postprocess_template.c"
594 #ifdef COMPILE_ALTIVEC
597 #define HAVE_ALTIVEC 1
598 #define RENAME(a) a ## _altivec
599 #include "postprocess_altivec_template.c"
600 #include "postprocess_template.c"
608 #define RENAME(a) a ## _MMX
609 #include "postprocess_template.c"
619 #define RENAME(a) a ## _MMX2
620 #include "postprocess_template.c"
631 #define HAVE_AMD3DNOW 1
632 #define RENAME(a) a ## _3DNow
633 #include "postprocess_template.c"
636 // minor note: the HAVE_xyz is messed up after that line so do not use it.
638 static inline void postProcess(const uint8_t src
[], int srcStride
, uint8_t dst
[], int dstStride
, int width
, int height
,
639 const QP_STORE_T QPs
[], int QPStride
, int isColor
, pp_mode
*vm
, pp_context
*vc
)
641 PPContext
*c
= (PPContext
*)vc
;
642 PPMode
*ppMode
= (PPMode
*)vm
;
643 c
->ppMode
= *ppMode
; //FIXME
645 // Using ifs here as they are faster than function pointers although the
646 // difference would not be measurable here but it is much better because
647 // someone might exchange the CPU whithout restarting MPlayer ;)
648 #ifdef RUNTIME_CPUDETECT
650 // ordered per speed fastest first
651 if(c
->cpuCaps
& PP_CPU_CAPS_MMX2
)
652 postProcess_MMX2(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
653 else if(c
->cpuCaps
& PP_CPU_CAPS_3DNOW
)
654 postProcess_3DNow(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
655 else if(c
->cpuCaps
& PP_CPU_CAPS_MMX
)
656 postProcess_MMX(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
658 postProcess_C(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
661 if(c
->cpuCaps
& PP_CPU_CAPS_ALTIVEC
)
662 postProcess_altivec(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
665 postProcess_C(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
667 #else //RUNTIME_CPUDETECT
669 postProcess_MMX2(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
671 postProcess_3DNow(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
673 postProcess_MMX(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
675 postProcess_altivec(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
677 postProcess_C(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
679 #endif //!RUNTIME_CPUDETECT
682 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
683 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
685 /* -pp Command line Help
687 #if LIBPOSTPROC_VERSION_INT < (52<<16)
688 const char *const pp_help
=
690 const char pp_help
[] =
692 "Available postprocessing filters:\n"
694 "short long name short long option Description\n"
695 "* * a autoq CPU power dependent enabler\n"
696 " c chrom chrominance filtering enabled\n"
697 " y nochrom chrominance filtering disabled\n"
698 " n noluma luma filtering disabled\n"
699 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
700 " 1. difference factor: default=32, higher -> more deblocking\n"
701 " 2. flatness threshold: default=39, lower -> more deblocking\n"
702 " the h & v deblocking filters share these\n"
703 " so you can't set different thresholds for h / v\n"
704 "vb vdeblock (2 threshold) vertical deblocking filter\n"
705 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
706 "va vadeblock (2 threshold) vertical deblocking filter\n"
707 "h1 x1hdeblock experimental h deblock filter 1\n"
708 "v1 x1vdeblock experimental v deblock filter 1\n"
709 "dr dering deringing filter\n"
710 "al autolevels automatic brightness / contrast\n"
711 " f fullyrange stretch luminance to (0..255)\n"
712 "lb linblenddeint linear blend deinterlacer\n"
713 "li linipoldeint linear interpolating deinterlace\n"
714 "ci cubicipoldeint cubic interpolating deinterlacer\n"
715 "md mediandeint median deinterlacer\n"
716 "fd ffmpegdeint ffmpeg deinterlacer\n"
717 "l5 lowpass5 FIR lowpass deinterlacer\n"
718 "de default hb:a,vb:a,dr:a\n"
719 "fa fast h1:a,v1:a,dr:a\n"
720 "ac ha:a:128:7,va:a,dr:a\n"
721 "tn tmpnoise (3 threshold) temporal noise reducer\n"
722 " 1. <= 2. <= 3. larger -> stronger filtering\n"
723 "fq forceQuant <quantizer> force quantizer\n"
725 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
726 "long form example:\n"
727 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
728 "short form example:\n"
729 "vb:a/hb:a/lb de,-vb\n"
735 pp_mode
*pp_get_mode_by_name_and_quality(const char *name
, int quality
)
737 char temp
[GET_MODE_BUFFER_SIZE
];
739 static const char filterDelimiters
[] = ",/";
740 static const char optionDelimiters
[] = ":";
741 struct PPMode
*ppMode
;
744 ppMode
= av_malloc(sizeof(PPMode
));
747 ppMode
->chromMode
= 0;
748 ppMode
->maxTmpNoise
[0]= 700;
749 ppMode
->maxTmpNoise
[1]= 1500;
750 ppMode
->maxTmpNoise
[2]= 3000;
751 ppMode
->maxAllowedY
= 234;
752 ppMode
->minAllowedY
= 16;
753 ppMode
->baseDcDiff
= 256/8;
754 ppMode
->flatnessThreshold
= 56-16-1;
755 ppMode
->maxClippedThreshold
= 0.01;
758 strncpy(temp
, name
, GET_MODE_BUFFER_SIZE
);
760 av_log(NULL
, AV_LOG_DEBUG
, "pp: %s\n", name
);
764 int q
= 1000000; //PP_QUALITY_MAX;
768 char *options
[OPTIONS_ARRAY_SIZE
];
771 int numOfUnknownOptions
=0;
772 int enable
=1; //does the user want us to enabled or disabled the filter
774 filterToken
= strtok(p
, filterDelimiters
);
775 if(filterToken
== NULL
) break;
776 p
+= strlen(filterToken
) + 1; // p points to next filterToken
777 filterName
= strtok(filterToken
, optionDelimiters
);
778 av_log(NULL
, AV_LOG_DEBUG
, "pp: %s::%s\n", filterToken
, filterName
);
780 if(*filterName
== '-'){
785 for(;;){ //for all options
786 option
= strtok(NULL
, optionDelimiters
);
787 if(option
== NULL
) break;
789 av_log(NULL
, AV_LOG_DEBUG
, "pp: option: %s\n", option
);
790 if(!strcmp("autoq", option
) || !strcmp("a", option
)) q
= quality
;
791 else if(!strcmp("nochrom", option
) || !strcmp("y", option
)) chrom
=0;
792 else if(!strcmp("chrom", option
) || !strcmp("c", option
)) chrom
=1;
793 else if(!strcmp("noluma", option
) || !strcmp("n", option
)) luma
=0;
795 options
[numOfUnknownOptions
] = option
;
796 numOfUnknownOptions
++;
798 if(numOfUnknownOptions
>= OPTIONS_ARRAY_SIZE
-1) break;
800 options
[numOfUnknownOptions
] = NULL
;
802 /* replace stuff from the replace Table */
803 for(i
=0; replaceTable
[2*i
]!=NULL
; i
++){
804 if(!strcmp(replaceTable
[2*i
], filterName
)){
805 int newlen
= strlen(replaceTable
[2*i
+ 1]);
809 if(p
==NULL
) p
= temp
, *p
=0; //last filter
810 else p
--, *p
=','; //not last filter
813 spaceLeft
= p
- temp
+ plen
;
814 if(spaceLeft
+ newlen
>= GET_MODE_BUFFER_SIZE
){
818 memmove(p
+ newlen
, p
, plen
+1);
819 memcpy(p
, replaceTable
[2*i
+ 1], newlen
);
824 for(i
=0; filters
[i
].shortName
!=NULL
; i
++){
825 if( !strcmp(filters
[i
].longName
, filterName
)
826 || !strcmp(filters
[i
].shortName
, filterName
)){
827 ppMode
->lumMode
&= ~filters
[i
].mask
;
828 ppMode
->chromMode
&= ~filters
[i
].mask
;
831 if(!enable
) break; // user wants to disable it
833 if(q
>= filters
[i
].minLumQuality
&& luma
)
834 ppMode
->lumMode
|= filters
[i
].mask
;
835 if(chrom
==1 || (chrom
==-1 && filters
[i
].chromDefault
))
836 if(q
>= filters
[i
].minChromQuality
)
837 ppMode
->chromMode
|= filters
[i
].mask
;
839 if(filters
[i
].mask
== LEVEL_FIX
){
841 ppMode
->minAllowedY
= 16;
842 ppMode
->maxAllowedY
= 234;
843 for(o
=0; options
[o
]!=NULL
; o
++){
844 if( !strcmp(options
[o
],"fullyrange")
845 ||!strcmp(options
[o
],"f")){
846 ppMode
->minAllowedY
= 0;
847 ppMode
->maxAllowedY
= 255;
848 numOfUnknownOptions
--;
852 else if(filters
[i
].mask
== TEMP_NOISE_FILTER
)
857 for(o
=0; options
[o
]!=NULL
; o
++){
859 ppMode
->maxTmpNoise
[numOfNoises
]=
860 strtol(options
[o
], &tail
, 0);
861 if(tail
!=options
[o
]){
863 numOfUnknownOptions
--;
864 if(numOfNoises
>= 3) break;
868 else if(filters
[i
].mask
== V_DEBLOCK
|| filters
[i
].mask
== H_DEBLOCK
869 || filters
[i
].mask
== V_A_DEBLOCK
|| filters
[i
].mask
== H_A_DEBLOCK
){
872 for(o
=0; options
[o
]!=NULL
&& o
<2; o
++){
874 int val
= strtol(options
[o
], &tail
, 0);
875 if(tail
==options
[o
]) break;
877 numOfUnknownOptions
--;
878 if(o
==0) ppMode
->baseDcDiff
= val
;
879 else ppMode
->flatnessThreshold
= val
;
882 else if(filters
[i
].mask
== FORCE_QUANT
){
884 ppMode
->forcedQuant
= 15;
886 for(o
=0; options
[o
]!=NULL
&& o
<1; o
++){
888 int val
= strtol(options
[o
], &tail
, 0);
889 if(tail
==options
[o
]) break;
891 numOfUnknownOptions
--;
892 ppMode
->forcedQuant
= val
;
897 if(!filterNameOk
) ppMode
->error
++;
898 ppMode
->error
+= numOfUnknownOptions
;
901 av_log(NULL
, AV_LOG_DEBUG
, "pp: lumMode=%X, chromMode=%X\n", ppMode
->lumMode
, ppMode
->chromMode
);
903 av_log(NULL
, AV_LOG_ERROR
, "%d errors in postprocess string \"%s\"\n", ppMode
->error
, name
);
910 void pp_free_mode(pp_mode
*mode
){
914 static void reallocAlign(void **p
, int alignment
, int size
){
916 *p
= av_mallocz(size
);
919 static void reallocBuffers(PPContext
*c
, int width
, int height
, int stride
, int qpStride
){
920 int mbWidth
= (width
+15)>>4;
921 int mbHeight
= (height
+15)>>4;
925 c
->qpStride
= qpStride
;
927 reallocAlign((void **)&c
->tempDst
, 8, stride
*24);
928 reallocAlign((void **)&c
->tempSrc
, 8, stride
*24);
929 reallocAlign((void **)&c
->tempBlocks
, 8, 2*16*8);
930 reallocAlign((void **)&c
->yHistogram
, 8, 256*sizeof(uint64_t));
932 c
->yHistogram
[i
]= width
*height
/64*15/256;
935 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
936 reallocAlign((void **)&c
->tempBlurred
[i
], 8, stride
*mbHeight
*16 + 17*1024);
937 reallocAlign((void **)&c
->tempBlurredPast
[i
], 8, 256*((height
+7)&(~7))/2 + 17*1024);//FIXME size
940 reallocAlign((void **)&c
->deintTemp
, 8, 2*width
+32);
941 reallocAlign((void **)&c
->nonBQPTable
, 8, qpStride
*mbHeight
*sizeof(QP_STORE_T
));
942 reallocAlign((void **)&c
->stdQPTable
, 8, qpStride
*mbHeight
*sizeof(QP_STORE_T
));
943 reallocAlign((void **)&c
->forcedQPTable
, 8, mbWidth
*sizeof(QP_STORE_T
));
946 static const char * context_to_name(void * ptr
) {
950 static const AVClass av_codec_context_class
= { "Postproc", context_to_name
, NULL
};
952 pp_context
*pp_get_context(int width
, int height
, int cpuCaps
){
953 PPContext
*c
= av_malloc(sizeof(PPContext
));
954 int stride
= (width
+15)&(~15); //assumed / will realloc if needed
955 int qpStride
= (width
+15)/16 + 2; //assumed / will realloc if needed
957 memset(c
, 0, sizeof(PPContext
));
958 c
->av_class
= &av_codec_context_class
;
960 if(cpuCaps
&PP_FORMAT
){
961 c
->hChromaSubSample
= cpuCaps
&0x3;
962 c
->vChromaSubSample
= (cpuCaps
>>4)&0x3;
964 c
->hChromaSubSample
= 1;
965 c
->vChromaSubSample
= 1;
968 reallocBuffers(c
, width
, height
, stride
, qpStride
);
975 void pp_free_context(void *vc
){
976 PPContext
*c
= (PPContext
*)vc
;
979 for(i
=0; i
<3; i
++) av_free(c
->tempBlurred
[i
]);
980 for(i
=0; i
<3; i
++) av_free(c
->tempBlurredPast
[i
]);
982 av_free(c
->tempBlocks
);
983 av_free(c
->yHistogram
);
986 av_free(c
->deintTemp
);
987 av_free(c
->stdQPTable
);
988 av_free(c
->nonBQPTable
);
989 av_free(c
->forcedQPTable
);
991 memset(c
, 0, sizeof(PPContext
));
996 void pp_postprocess(const uint8_t * src
[3], const int srcStride
[3],
997 uint8_t * dst
[3], const int dstStride
[3],
998 int width
, int height
,
999 const QP_STORE_T
*QP_store
, int QPStride
,
1000 pp_mode
*vm
, void *vc
, int pict_type
)
1002 int mbWidth
= (width
+15)>>4;
1003 int mbHeight
= (height
+15)>>4;
1004 PPMode
*mode
= (PPMode
*)vm
;
1005 PPContext
*c
= (PPContext
*)vc
;
1006 int minStride
= FFMAX(FFABS(srcStride
[0]), FFABS(dstStride
[0]));
1007 int absQPStride
= FFABS(QPStride
);
1009 // c->stride and c->QPStride are always positive
1010 if(c
->stride
< minStride
|| c
->qpStride
< absQPStride
)
1011 reallocBuffers(c
, width
, height
,
1012 FFMAX(minStride
, c
->stride
),
1013 FFMAX(c
->qpStride
, absQPStride
));
1015 if(QP_store
==NULL
|| (mode
->lumMode
& FORCE_QUANT
)){
1017 QP_store
= c
->forcedQPTable
;
1018 absQPStride
= QPStride
= 0;
1019 if(mode
->lumMode
& FORCE_QUANT
)
1020 for(i
=0; i
<mbWidth
; i
++) c
->forcedQPTable
[i
]= mode
->forcedQuant
;
1022 for(i
=0; i
<mbWidth
; i
++) c
->forcedQPTable
[i
]= 1;
1025 if(pict_type
& PP_PICT_TYPE_QP2
){
1027 const int count
= mbHeight
* absQPStride
;
1028 for(i
=0; i
<(count
>>2); i
++){
1029 ((uint32_t*)c
->stdQPTable
)[i
] = (((const uint32_t*)QP_store
)[i
]>>1) & 0x7F7F7F7F;
1031 for(i
<<=2; i
<count
; i
++){
1032 c
->stdQPTable
[i
] = QP_store
[i
]>>1;
1034 QP_store
= c
->stdQPTable
;
1035 QPStride
= absQPStride
;
1040 for(y
=0; y
<mbHeight
; y
++){
1041 for(x
=0; x
<mbWidth
; x
++){
1042 av_log(c
, AV_LOG_INFO
, "%2d ", QP_store
[x
+ y
*QPStride
]);
1044 av_log(c
, AV_LOG_INFO
, "\n");
1046 av_log(c
, AV_LOG_INFO
, "\n");
1049 if((pict_type
&7)!=3){
1052 const int count
= mbHeight
* QPStride
;
1053 for(i
=0; i
<(count
>>2); i
++){
1054 ((uint32_t*)c
->nonBQPTable
)[i
] = ((const uint32_t*)QP_store
)[i
] & 0x3F3F3F3F;
1056 for(i
<<=2; i
<count
; i
++){
1057 c
->nonBQPTable
[i
] = QP_store
[i
] & 0x3F;
1061 for(i
=0; i
<mbHeight
; i
++) {
1062 for(j
=0; j
<absQPStride
; j
++) {
1063 c
->nonBQPTable
[i
*absQPStride
+j
] = QP_store
[i
*QPStride
+j
] & 0x3F;
1069 av_log(c
, AV_LOG_DEBUG
, "using npp filters 0x%X/0x%X\n",
1070 mode
->lumMode
, mode
->chromMode
);
1072 postProcess(src
[0], srcStride
[0], dst
[0], dstStride
[0],
1073 width
, height
, QP_store
, QPStride
, 0, mode
, c
);
1075 width
= (width
)>>c
->hChromaSubSample
;
1076 height
= (height
)>>c
->vChromaSubSample
;
1078 if(mode
->chromMode
){
1079 postProcess(src
[1], srcStride
[1], dst
[1], dstStride
[1],
1080 width
, height
, QP_store
, QPStride
, 1, mode
, c
);
1081 postProcess(src
[2], srcStride
[2], dst
[2], dstStride
[2],
1082 width
, height
, QP_store
, QPStride
, 2, mode
, c
);
1084 else if(srcStride
[1] == dstStride
[1] && srcStride
[2] == dstStride
[2]){
1085 linecpy(dst
[1], src
[1], height
, srcStride
[1]);
1086 linecpy(dst
[2], src
[2], height
, srcStride
[2]);
1089 for(y
=0; y
<height
; y
++){
1090 memcpy(&(dst
[1][y
*dstStride
[1]]), &(src
[1][y
*srcStride
[1]]), width
);
1091 memcpy(&(dst
[2][y
*dstStride
[2]]), &(src
[2][y
*srcStride
[2]]), width
);