2 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
4 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 C MMX MMX2 3DNow AltiVec
31 isVertMinMaxOk Ec Ec Ec
32 doVertLowPass E e e Ec
33 doVertDefFilter Ec Ec e e Ec
35 isHorizMinMaxOk a E Ec
36 doHorizLowPass E e e Ec
37 doHorizDefFilter Ec Ec e e Ec
38 do_a_deblock Ec E Ec E
40 Vertical RKAlgo1 E a a
41 Horizontal RKAlgo1 a a
44 LinIpolDeinterlace e E E*
45 CubicIpolDeinterlace a e e*
46 LinBlendDeinterlace e E E*
47 MedianDeinterlace# E Ec Ec
48 TempDeNoiser# E e e Ec
50 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 # more or less selfinvented filters so the exactness is not too meaningful
52 E = Exact implementation
53 e = almost exact implementation (slightly different rounding,...)
54 a = alternative / approximate impl
55 c = checked against the other implementations (-vo md5)
56 p = partially optimized, still some work to do
61 reduce the time wasted on the mem transfer
62 unroll stuff if instructions depend too much on the prior one
63 move YScale thing to the end instead of fixing QP
64 write a faster and higher quality deblocking filter :)
65 make the mainloop more flexible (variable number of blocks at once
66 (the if/else stuff per block is slowing things down)
67 compare the quality & speed of all filters
70 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
74 //Changelog: use the Subversion log
77 #include "libavutil/avutil.h"
89 //#define DEBUG_BRIGHTNESS
90 #include "postprocess.h"
91 #include "postprocess_internal.h"
97 #define GET_MODE_BUFFER_SIZE 500
98 #define OPTIONS_ARRAY_SIZE 10
100 #define TEMP_STRIDE 8
101 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
103 #if defined(ARCH_X86)
104 DECLARE_ASM_CONST(8, uint64_t, w05
)= 0x0005000500050005LL
;
105 DECLARE_ASM_CONST(8, uint64_t, w04
)= 0x0004000400040004LL
;
106 DECLARE_ASM_CONST(8, uint64_t, w20
)= 0x0020002000200020LL
;
107 DECLARE_ASM_CONST(8, uint64_t, b00
)= 0x0000000000000000LL
;
108 DECLARE_ASM_CONST(8, uint64_t, b01
)= 0x0101010101010101LL
;
109 DECLARE_ASM_CONST(8, uint64_t, b02
)= 0x0202020202020202LL
;
110 DECLARE_ASM_CONST(8, uint64_t, b08
)= 0x0808080808080808LL
;
111 DECLARE_ASM_CONST(8, uint64_t, b80
)= 0x8080808080808080LL
;
114 DECLARE_ASM_CONST(8, int, deringThreshold
)= 20;
117 static struct PPFilter filters
[]=
119 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK
},
120 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK
},
121 /* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
122 {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
123 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER
},
124 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER
},
125 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK
},
126 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK
},
127 {"dr", "dering", 1, 5, 6, DERING
},
128 {"al", "autolevels", 0, 1, 2, LEVEL_FIX
},
129 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER
},
130 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER
},
131 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER
},
132 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER
},
133 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER
},
134 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER
},
135 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER
},
136 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT
},
137 {NULL
, NULL
,0,0,0,0} //End Marker
140 static const char *replaceTable
[]=
142 "default", "hb:a,vb:a,dr:a",
143 "de", "hb:a,vb:a,dr:a",
144 "fast", "h1:a,v1:a,dr:a",
145 "fa", "h1:a,v1:a,dr:a",
146 "ac", "ha:a:128:7,va:a,dr:a",
151 #if defined(ARCH_X86)
152 static inline void prefetchnta(void *p
)
154 asm volatile( "prefetchnta (%0)\n\t"
159 static inline void prefetcht0(void *p
)
161 asm volatile( "prefetcht0 (%0)\n\t"
166 static inline void prefetcht1(void *p
)
168 asm volatile( "prefetcht1 (%0)\n\t"
173 static inline void prefetcht2(void *p
)
175 asm volatile( "prefetcht2 (%0)\n\t"
181 /* The horizontal functions exist only in C because the MMX
182 * code is faster with vertical filters and transposing. */
185 * Check if the given 8x8 Block is mostly "flat"
187 static inline int isHorizDC_C(uint8_t src
[], int stride
, PPContext
*c
)
191 const int dcOffset
= ((c
->nonBQP
*c
->ppMode
.baseDcDiff
)>>8) + 1;
192 const int dcThreshold
= dcOffset
*2 + 1;
194 for(y
=0; y
<BLOCK_SIZE
; y
++){
195 if(((unsigned)(src
[0] - src
[1] + dcOffset
)) < dcThreshold
) numEq
++;
196 if(((unsigned)(src
[1] - src
[2] + dcOffset
)) < dcThreshold
) numEq
++;
197 if(((unsigned)(src
[2] - src
[3] + dcOffset
)) < dcThreshold
) numEq
++;
198 if(((unsigned)(src
[3] - src
[4] + dcOffset
)) < dcThreshold
) numEq
++;
199 if(((unsigned)(src
[4] - src
[5] + dcOffset
)) < dcThreshold
) numEq
++;
200 if(((unsigned)(src
[5] - src
[6] + dcOffset
)) < dcThreshold
) numEq
++;
201 if(((unsigned)(src
[6] - src
[7] + dcOffset
)) < dcThreshold
) numEq
++;
204 return numEq
> c
->ppMode
.flatnessThreshold
;
208 * Check if the middle 8x8 Block in the given 8x16 block is flat
210 static inline int isVertDC_C(uint8_t src
[], int stride
, PPContext
*c
)
214 const int dcOffset
= ((c
->nonBQP
*c
->ppMode
.baseDcDiff
)>>8) + 1;
215 const int dcThreshold
= dcOffset
*2 + 1;
217 src
+= stride
*4; // src points to begin of the 8x8 Block
218 for(y
=0; y
<BLOCK_SIZE
-1; y
++){
219 if(((unsigned)(src
[0] - src
[0+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
220 if(((unsigned)(src
[1] - src
[1+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
221 if(((unsigned)(src
[2] - src
[2+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
222 if(((unsigned)(src
[3] - src
[3+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
223 if(((unsigned)(src
[4] - src
[4+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
224 if(((unsigned)(src
[5] - src
[5+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
225 if(((unsigned)(src
[6] - src
[6+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
226 if(((unsigned)(src
[7] - src
[7+stride
] + dcOffset
)) < dcThreshold
) numEq
++;
229 return numEq
> c
->ppMode
.flatnessThreshold
;
232 static inline int isHorizMinMaxOk_C(uint8_t src
[], int stride
, int QP
)
237 if((unsigned)(src
[0] - src
[5] + 2*QP
) > 4*QP
) return 0;
239 if((unsigned)(src
[2] - src
[7] + 2*QP
) > 4*QP
) return 0;
241 if((unsigned)(src
[4] - src
[1] + 2*QP
) > 4*QP
) return 0;
243 if((unsigned)(src
[6] - src
[3] + 2*QP
) > 4*QP
) return 0;
248 if((unsigned)(src
[0] - src
[7] + 2*QP
) > 4*QP
) return 0;
255 static inline int isVertMinMaxOk_C(uint8_t src
[], int stride
, int QP
)
261 for(x
=0; x
<BLOCK_SIZE
; x
+=4){
262 if((unsigned)(src
[ x
+ 0*stride
] - src
[ x
+ 5*stride
] + 2*QP
) > 4*QP
) return 0;
263 if((unsigned)(src
[1+x
+ 2*stride
] - src
[1+x
+ 7*stride
] + 2*QP
) > 4*QP
) return 0;
264 if((unsigned)(src
[2+x
+ 4*stride
] - src
[2+x
+ 1*stride
] + 2*QP
) > 4*QP
) return 0;
265 if((unsigned)(src
[3+x
+ 6*stride
] - src
[3+x
+ 3*stride
] + 2*QP
) > 4*QP
) return 0;
270 for(x
=0; x
<BLOCK_SIZE
; x
++){
271 if((unsigned)(src
[x
+ stride
] - src
[x
+ (stride
<<3)] + 2*QP
) > 4*QP
) return 0;
278 for(x
=0; x
<BLOCK_SIZE
; x
++){
283 int v
= src
[x
+ y
*stride
];
287 if(max
-min
> 2*QP
) return 0;
293 static inline int horizClassify_C(uint8_t src
[], int stride
, PPContext
*c
)
295 if( isHorizDC_C(src
, stride
, c
) ){
296 if( isHorizMinMaxOk_C(src
, stride
, c
->QP
) )
305 static inline int vertClassify_C(uint8_t src
[], int stride
, PPContext
*c
)
307 if( isVertDC_C(src
, stride
, c
) ){
308 if( isVertMinMaxOk_C(src
, stride
, c
->QP
) )
317 static inline void doHorizDefFilter_C(uint8_t dst
[], int stride
, PPContext
*c
)
320 for(y
=0; y
<BLOCK_SIZE
; y
++){
321 const int middleEnergy
= 5*(dst
[4] - dst
[3]) + 2*(dst
[2] - dst
[5]);
323 if(FFABS(middleEnergy
) < 8*c
->QP
){
324 const int q
=(dst
[3] - dst
[4])/2;
325 const int leftEnergy
= 5*(dst
[2] - dst
[1]) + 2*(dst
[0] - dst
[3]);
326 const int rightEnergy
= 5*(dst
[6] - dst
[5]) + 2*(dst
[4] - dst
[7]);
328 int d
= FFABS(middleEnergy
) - FFMIN( FFABS(leftEnergy
), FFABS(rightEnergy
) );
332 d
*= FFSIGN(-middleEnergy
);
353 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
354 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
356 static inline void doHorizLowPass_C(uint8_t dst
[], int stride
, PPContext
*c
)
359 for(y
=0; y
<BLOCK_SIZE
; y
++){
360 const int first
= FFABS(dst
[-1] - dst
[0]) < c
->QP
? dst
[-1] : dst
[0];
361 const int last
= FFABS(dst
[8] - dst
[7]) < c
->QP
? dst
[8] : dst
[7];
364 sums
[0] = 4*first
+ dst
[0] + dst
[1] + dst
[2] + 4;
365 sums
[1] = sums
[0] - first
+ dst
[3];
366 sums
[2] = sums
[1] - first
+ dst
[4];
367 sums
[3] = sums
[2] - first
+ dst
[5];
368 sums
[4] = sums
[3] - first
+ dst
[6];
369 sums
[5] = sums
[4] - dst
[0] + dst
[7];
370 sums
[6] = sums
[5] - dst
[1] + last
;
371 sums
[7] = sums
[6] - dst
[2] + last
;
372 sums
[8] = sums
[7] - dst
[3] + last
;
373 sums
[9] = sums
[8] - dst
[4] + last
;
375 dst
[0]= (sums
[0] + sums
[2] + 2*dst
[0])>>4;
376 dst
[1]= (sums
[1] + sums
[3] + 2*dst
[1])>>4;
377 dst
[2]= (sums
[2] + sums
[4] + 2*dst
[2])>>4;
378 dst
[3]= (sums
[3] + sums
[5] + 2*dst
[3])>>4;
379 dst
[4]= (sums
[4] + sums
[6] + 2*dst
[4])>>4;
380 dst
[5]= (sums
[5] + sums
[7] + 2*dst
[5])>>4;
381 dst
[6]= (sums
[6] + sums
[8] + 2*dst
[6])>>4;
382 dst
[7]= (sums
[7] + sums
[9] + 2*dst
[7])>>4;
389 * Experimental Filter 1 (Horizontal)
390 * will not damage linear gradients
391 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
392 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
393 * MMX2 version does correct clipping C version does not
394 * not identical with the vertical one
396 static inline void horizX1Filter(uint8_t *src
, int stride
, int QP
)
399 static uint64_t *lut
= NULL
;
403 lut
= av_malloc(256*8);
406 int v
= i
< 128 ? 2*i
: 2*(i
-256);
408 //Simulate 112242211 9-Tap filter
409 uint64_t a= (v/16) & 0xFF;
410 uint64_t b= (v/8) & 0xFF;
411 uint64_t c= (v/4) & 0xFF;
412 uint64_t d= (3*v/8) & 0xFF;
414 //Simulate piecewise linear interpolation
415 uint64_t a
= (v
/16) & 0xFF;
416 uint64_t b
= (v
*3/16) & 0xFF;
417 uint64_t c
= (v
*5/16) & 0xFF;
418 uint64_t d
= (7*v
/16) & 0xFF;
419 uint64_t A
= (0x100 - a
)&0xFF;
420 uint64_t B
= (0x100 - b
)&0xFF;
421 uint64_t C
= (0x100 - c
)&0xFF;
422 uint64_t D
= (0x100 - c
)&0xFF;
424 lut
[i
] = (a
<<56) | (b
<<48) | (c
<<40) | (d
<<32) |
425 (D
<<24) | (C
<<16) | (B
<<8) | (A
);
426 //lut[i] = (v<<32) | (v<<24);
430 for(y
=0; y
<BLOCK_SIZE
; y
++){
431 int a
= src
[1] - src
[2];
432 int b
= src
[3] - src
[4];
433 int c
= src
[5] - src
[6];
435 int d
= FFMAX(FFABS(b
) - (FFABS(a
) + FFABS(c
))/2, 0);
438 int v
= d
* FFSIGN(-b
);
452 * accurate deblock filter
454 static av_always_inline
void do_a_deblock_C(uint8_t *src
, int step
, int stride
, PPContext
*c
){
457 const int dcOffset
= ((c
->nonBQP
*c
->ppMode
.baseDcDiff
)>>8) + 1;
458 const int dcThreshold
= dcOffset
*2 + 1;
460 src
+= step
*4; // src points to begin of the 8x8 Block
464 if(((unsigned)(src
[-1*step
] - src
[0*step
] + dcOffset
)) < dcThreshold
) numEq
++;
465 if(((unsigned)(src
[ 0*step
] - src
[1*step
] + dcOffset
)) < dcThreshold
) numEq
++;
466 if(((unsigned)(src
[ 1*step
] - src
[2*step
] + dcOffset
)) < dcThreshold
) numEq
++;
467 if(((unsigned)(src
[ 2*step
] - src
[3*step
] + dcOffset
)) < dcThreshold
) numEq
++;
468 if(((unsigned)(src
[ 3*step
] - src
[4*step
] + dcOffset
)) < dcThreshold
) numEq
++;
469 if(((unsigned)(src
[ 4*step
] - src
[5*step
] + dcOffset
)) < dcThreshold
) numEq
++;
470 if(((unsigned)(src
[ 5*step
] - src
[6*step
] + dcOffset
)) < dcThreshold
) numEq
++;
471 if(((unsigned)(src
[ 6*step
] - src
[7*step
] + dcOffset
)) < dcThreshold
) numEq
++;
472 if(((unsigned)(src
[ 7*step
] - src
[8*step
] + dcOffset
)) < dcThreshold
) numEq
++;
473 if(numEq
> c
->ppMode
.flatnessThreshold
){
476 if(src
[0] > src
[step
]){
484 if(src
[x
*step
] > src
[(x
+1)*step
]){
485 if(src
[x
*step
] > max
) max
= src
[ x
*step
];
486 if(src
[(x
+1)*step
] < min
) min
= src
[(x
+1)*step
];
488 if(src
[(x
+1)*step
] > max
) max
= src
[(x
+1)*step
];
489 if(src
[ x
*step
] < min
) min
= src
[ x
*step
];
493 const int first
= FFABS(src
[-1*step
] - src
[0]) < QP
? src
[-1*step
] : src
[0];
494 const int last
= FFABS(src
[8*step
] - src
[7*step
]) < QP
? src
[8*step
] : src
[7*step
];
497 sums
[0] = 4*first
+ src
[0*step
] + src
[1*step
] + src
[2*step
] + 4;
498 sums
[1] = sums
[0] - first
+ src
[3*step
];
499 sums
[2] = sums
[1] - first
+ src
[4*step
];
500 sums
[3] = sums
[2] - first
+ src
[5*step
];
501 sums
[4] = sums
[3] - first
+ src
[6*step
];
502 sums
[5] = sums
[4] - src
[0*step
] + src
[7*step
];
503 sums
[6] = sums
[5] - src
[1*step
] + last
;
504 sums
[7] = sums
[6] - src
[2*step
] + last
;
505 sums
[8] = sums
[7] - src
[3*step
] + last
;
506 sums
[9] = sums
[8] - src
[4*step
] + last
;
508 src
[0*step
]= (sums
[0] + sums
[2] + 2*src
[0*step
])>>4;
509 src
[1*step
]= (sums
[1] + sums
[3] + 2*src
[1*step
])>>4;
510 src
[2*step
]= (sums
[2] + sums
[4] + 2*src
[2*step
])>>4;
511 src
[3*step
]= (sums
[3] + sums
[5] + 2*src
[3*step
])>>4;
512 src
[4*step
]= (sums
[4] + sums
[6] + 2*src
[4*step
])>>4;
513 src
[5*step
]= (sums
[5] + sums
[7] + 2*src
[5*step
])>>4;
514 src
[6*step
]= (sums
[6] + sums
[8] + 2*src
[6*step
])>>4;
515 src
[7*step
]= (sums
[7] + sums
[9] + 2*src
[7*step
])>>4;
518 const int middleEnergy
= 5*(src
[4*step
] - src
[3*step
]) + 2*(src
[2*step
] - src
[5*step
]);
520 if(FFABS(middleEnergy
) < 8*QP
){
521 const int q
=(src
[3*step
] - src
[4*step
])/2;
522 const int leftEnergy
= 5*(src
[2*step
] - src
[1*step
]) + 2*(src
[0*step
] - src
[3*step
]);
523 const int rightEnergy
= 5*(src
[6*step
] - src
[5*step
]) + 2*(src
[4*step
] - src
[7*step
]);
525 int d
= FFABS(middleEnergy
) - FFMIN( FFABS(leftEnergy
), FFABS(rightEnergy
) );
529 d
*= FFSIGN(-middleEnergy
);
553 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
555 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
560 #define COMPILE_ALTIVEC
561 #endif //HAVE_ALTIVEC
563 #if defined(ARCH_X86)
565 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
569 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
573 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
574 #define COMPILE_3DNOW
576 #endif /* defined(ARCH_X86) */
587 #define RENAME(a) a ## _C
588 #include "postprocess_template.c"
591 #ifdef COMPILE_ALTIVEC
594 #define RENAME(a) a ## _altivec
595 #include "postprocess_altivec_template.c"
596 #include "postprocess_template.c"
605 #define RENAME(a) a ## _MMX
606 #include "postprocess_template.c"
615 #define RENAME(a) a ## _MMX2
616 #include "postprocess_template.c"
625 #define RENAME(a) a ## _3DNow
626 #include "postprocess_template.c"
629 // minor note: the HAVE_xyz is messed up after that line so do not use it.
631 static inline void postProcess(const uint8_t src
[], int srcStride
, uint8_t dst
[], int dstStride
, int width
, int height
,
632 const QP_STORE_T QPs
[], int QPStride
, int isColor
, pp_mode_t
*vm
, pp_context_t
*vc
)
634 PPContext
*c
= (PPContext
*)vc
;
635 PPMode
*ppMode
= (PPMode
*)vm
;
636 c
->ppMode
= *ppMode
; //FIXME
638 // Using ifs here as they are faster than function pointers although the
639 // difference would not be measurable here but it is much better because
640 // someone might exchange the CPU whithout restarting MPlayer ;)
641 #ifdef RUNTIME_CPUDETECT
642 #if defined(ARCH_X86)
643 // ordered per speed fastest first
644 if(c
->cpuCaps
& PP_CPU_CAPS_MMX2
)
645 postProcess_MMX2(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
646 else if(c
->cpuCaps
& PP_CPU_CAPS_3DNOW
)
647 postProcess_3DNow(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
648 else if(c
->cpuCaps
& PP_CPU_CAPS_MMX
)
649 postProcess_MMX(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
651 postProcess_C(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
654 if(c
->cpuCaps
& PP_CPU_CAPS_ALTIVEC
)
655 postProcess_altivec(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
658 postProcess_C(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
660 #else //RUNTIME_CPUDETECT
662 postProcess_MMX2(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
663 #elif defined (HAVE_3DNOW)
664 postProcess_3DNow(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
665 #elif defined (HAVE_MMX)
666 postProcess_MMX(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
667 #elif defined (HAVE_ALTIVEC)
668 postProcess_altivec(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
670 postProcess_C(src
, srcStride
, dst
, dstStride
, width
, height
, QPs
, QPStride
, isColor
, c
);
672 #endif //!RUNTIME_CPUDETECT
675 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
676 // QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
678 /* -pp Command line Help
680 #if LIBPOSTPROC_VERSION_INT < (52<<16)
681 const char *const pp_help
=
683 const char pp_help
[] =
685 "Available postprocessing filters:\n"
687 "short long name short long option Description\n"
688 "* * a autoq CPU power dependent enabler\n"
689 " c chrom chrominance filtering enabled\n"
690 " y nochrom chrominance filtering disabled\n"
691 " n noluma luma filtering disabled\n"
692 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
693 " 1. difference factor: default=32, higher -> more deblocking\n"
694 " 2. flatness threshold: default=39, lower -> more deblocking\n"
695 " the h & v deblocking filters share these\n"
696 " so you can't set different thresholds for h / v\n"
697 "vb vdeblock (2 threshold) vertical deblocking filter\n"
698 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
699 "va vadeblock (2 threshold) vertical deblocking filter\n"
700 "h1 x1hdeblock experimental h deblock filter 1\n"
701 "v1 x1vdeblock experimental v deblock filter 1\n"
702 "dr dering deringing filter\n"
703 "al autolevels automatic brightness / contrast\n"
704 " f fullyrange stretch luminance to (0..255)\n"
705 "lb linblenddeint linear blend deinterlacer\n"
706 "li linipoldeint linear interpolating deinterlace\n"
707 "ci cubicipoldeint cubic interpolating deinterlacer\n"
708 "md mediandeint median deinterlacer\n"
709 "fd ffmpegdeint ffmpeg deinterlacer\n"
710 "l5 lowpass5 FIR lowpass deinterlacer\n"
711 "de default hb:a,vb:a,dr:a\n"
712 "fa fast h1:a,v1:a,dr:a\n"
713 "ac ha:a:128:7,va:a,dr:a\n"
714 "tn tmpnoise (3 threshold) temporal noise reducer\n"
715 " 1. <= 2. <= 3. larger -> stronger filtering\n"
716 "fq forceQuant <quantizer> force quantizer\n"
718 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
719 "long form example:\n"
720 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
721 "short form example:\n"
722 "vb:a/hb:a/lb de,-vb\n"
728 pp_mode_t
*pp_get_mode_by_name_and_quality(const char *name
, int quality
)
730 char temp
[GET_MODE_BUFFER_SIZE
];
732 static const char filterDelimiters
[] = ",/";
733 static const char optionDelimiters
[] = ":";
734 struct PPMode
*ppMode
;
737 ppMode
= av_malloc(sizeof(PPMode
));
740 ppMode
->chromMode
= 0;
741 ppMode
->maxTmpNoise
[0]= 700;
742 ppMode
->maxTmpNoise
[1]= 1500;
743 ppMode
->maxTmpNoise
[2]= 3000;
744 ppMode
->maxAllowedY
= 234;
745 ppMode
->minAllowedY
= 16;
746 ppMode
->baseDcDiff
= 256/8;
747 ppMode
->flatnessThreshold
= 56-16-1;
748 ppMode
->maxClippedThreshold
= 0.01;
751 strncpy(temp
, name
, GET_MODE_BUFFER_SIZE
);
753 av_log(NULL
, AV_LOG_DEBUG
, "pp: %s\n", name
);
757 int q
= 1000000; //PP_QUALITY_MAX;
761 char *options
[OPTIONS_ARRAY_SIZE
];
764 int numOfUnknownOptions
=0;
765 int enable
=1; //does the user want us to enabled or disabled the filter
767 filterToken
= strtok(p
, filterDelimiters
);
768 if(filterToken
== NULL
) break;
769 p
+= strlen(filterToken
) + 1; // p points to next filterToken
770 filterName
= strtok(filterToken
, optionDelimiters
);
771 av_log(NULL
, AV_LOG_DEBUG
, "pp: %s::%s\n", filterToken
, filterName
);
773 if(*filterName
== '-'){
778 for(;;){ //for all options
779 option
= strtok(NULL
, optionDelimiters
);
780 if(option
== NULL
) break;
782 av_log(NULL
, AV_LOG_DEBUG
, "pp: option: %s\n", option
);
783 if(!strcmp("autoq", option
) || !strcmp("a", option
)) q
= quality
;
784 else if(!strcmp("nochrom", option
) || !strcmp("y", option
)) chrom
=0;
785 else if(!strcmp("chrom", option
) || !strcmp("c", option
)) chrom
=1;
786 else if(!strcmp("noluma", option
) || !strcmp("n", option
)) luma
=0;
788 options
[numOfUnknownOptions
] = option
;
789 numOfUnknownOptions
++;
791 if(numOfUnknownOptions
>= OPTIONS_ARRAY_SIZE
-1) break;
793 options
[numOfUnknownOptions
] = NULL
;
795 /* replace stuff from the replace Table */
796 for(i
=0; replaceTable
[2*i
]!=NULL
; i
++){
797 if(!strcmp(replaceTable
[2*i
], filterName
)){
798 int newlen
= strlen(replaceTable
[2*i
+ 1]);
802 if(p
==NULL
) p
= temp
, *p
=0; //last filter
803 else p
--, *p
=','; //not last filter
806 spaceLeft
= p
- temp
+ plen
;
807 if(spaceLeft
+ newlen
>= GET_MODE_BUFFER_SIZE
){
811 memmove(p
+ newlen
, p
, plen
+1);
812 memcpy(p
, replaceTable
[2*i
+ 1], newlen
);
817 for(i
=0; filters
[i
].shortName
!=NULL
; i
++){
818 if( !strcmp(filters
[i
].longName
, filterName
)
819 || !strcmp(filters
[i
].shortName
, filterName
)){
820 ppMode
->lumMode
&= ~filters
[i
].mask
;
821 ppMode
->chromMode
&= ~filters
[i
].mask
;
824 if(!enable
) break; // user wants to disable it
826 if(q
>= filters
[i
].minLumQuality
&& luma
)
827 ppMode
->lumMode
|= filters
[i
].mask
;
828 if(chrom
==1 || (chrom
==-1 && filters
[i
].chromDefault
))
829 if(q
>= filters
[i
].minChromQuality
)
830 ppMode
->chromMode
|= filters
[i
].mask
;
832 if(filters
[i
].mask
== LEVEL_FIX
){
834 ppMode
->minAllowedY
= 16;
835 ppMode
->maxAllowedY
= 234;
836 for(o
=0; options
[o
]!=NULL
; o
++){
837 if( !strcmp(options
[o
],"fullyrange")
838 ||!strcmp(options
[o
],"f")){
839 ppMode
->minAllowedY
= 0;
840 ppMode
->maxAllowedY
= 255;
841 numOfUnknownOptions
--;
845 else if(filters
[i
].mask
== TEMP_NOISE_FILTER
)
850 for(o
=0; options
[o
]!=NULL
; o
++){
852 ppMode
->maxTmpNoise
[numOfNoises
]=
853 strtol(options
[o
], &tail
, 0);
854 if(tail
!=options
[o
]){
856 numOfUnknownOptions
--;
857 if(numOfNoises
>= 3) break;
861 else if(filters
[i
].mask
== V_DEBLOCK
|| filters
[i
].mask
== H_DEBLOCK
862 || filters
[i
].mask
== V_A_DEBLOCK
|| filters
[i
].mask
== H_A_DEBLOCK
){
865 for(o
=0; options
[o
]!=NULL
&& o
<2; o
++){
867 int val
= strtol(options
[o
], &tail
, 0);
868 if(tail
==options
[o
]) break;
870 numOfUnknownOptions
--;
871 if(o
==0) ppMode
->baseDcDiff
= val
;
872 else ppMode
->flatnessThreshold
= val
;
875 else if(filters
[i
].mask
== FORCE_QUANT
){
877 ppMode
->forcedQuant
= 15;
879 for(o
=0; options
[o
]!=NULL
&& o
<1; o
++){
881 int val
= strtol(options
[o
], &tail
, 0);
882 if(tail
==options
[o
]) break;
884 numOfUnknownOptions
--;
885 ppMode
->forcedQuant
= val
;
890 if(!filterNameOk
) ppMode
->error
++;
891 ppMode
->error
+= numOfUnknownOptions
;
894 av_log(NULL
, AV_LOG_DEBUG
, "pp: lumMode=%X, chromMode=%X\n", ppMode
->lumMode
, ppMode
->chromMode
);
896 av_log(NULL
, AV_LOG_ERROR
, "%d errors in postprocess string \"%s\"\n", ppMode
->error
, name
);
903 void pp_free_mode(pp_mode_t
*mode
){
907 static void reallocAlign(void **p
, int alignment
, int size
){
909 *p
= av_mallocz(size
);
912 static void reallocBuffers(PPContext
*c
, int width
, int height
, int stride
, int qpStride
){
913 int mbWidth
= (width
+15)>>4;
914 int mbHeight
= (height
+15)>>4;
918 c
->qpStride
= qpStride
;
920 reallocAlign((void **)&c
->tempDst
, 8, stride
*24);
921 reallocAlign((void **)&c
->tempSrc
, 8, stride
*24);
922 reallocAlign((void **)&c
->tempBlocks
, 8, 2*16*8);
923 reallocAlign((void **)&c
->yHistogram
, 8, 256*sizeof(uint64_t));
925 c
->yHistogram
[i
]= width
*height
/64*15/256;
928 //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
929 reallocAlign((void **)&c
->tempBlurred
[i
], 8, stride
*mbHeight
*16 + 17*1024);
930 reallocAlign((void **)&c
->tempBlurredPast
[i
], 8, 256*((height
+7)&(~7))/2 + 17*1024);//FIXME size
933 reallocAlign((void **)&c
->deintTemp
, 8, 2*width
+32);
934 reallocAlign((void **)&c
->nonBQPTable
, 8, qpStride
*mbHeight
*sizeof(QP_STORE_T
));
935 reallocAlign((void **)&c
->stdQPTable
, 8, qpStride
*mbHeight
*sizeof(QP_STORE_T
));
936 reallocAlign((void **)&c
->forcedQPTable
, 8, mbWidth
*sizeof(QP_STORE_T
));
939 static const char * context_to_name(void * ptr
) {
943 static const AVClass av_codec_context_class
= { "Postproc", context_to_name
, NULL
};
945 pp_context_t
*pp_get_context(int width
, int height
, int cpuCaps
){
946 PPContext
*c
= av_malloc(sizeof(PPContext
));
947 int stride
= (width
+15)&(~15); //assumed / will realloc if needed
948 int qpStride
= (width
+15)/16 + 2; //assumed / will realloc if needed
950 memset(c
, 0, sizeof(PPContext
));
951 c
->av_class
= &av_codec_context_class
;
953 if(cpuCaps
&PP_FORMAT
){
954 c
->hChromaSubSample
= cpuCaps
&0x3;
955 c
->vChromaSubSample
= (cpuCaps
>>4)&0x3;
957 c
->hChromaSubSample
= 1;
958 c
->vChromaSubSample
= 1;
961 reallocBuffers(c
, width
, height
, stride
, qpStride
);
968 void pp_free_context(void *vc
){
969 PPContext
*c
= (PPContext
*)vc
;
972 for(i
=0; i
<3; i
++) av_free(c
->tempBlurred
[i
]);
973 for(i
=0; i
<3; i
++) av_free(c
->tempBlurredPast
[i
]);
975 av_free(c
->tempBlocks
);
976 av_free(c
->yHistogram
);
979 av_free(c
->deintTemp
);
980 av_free(c
->stdQPTable
);
981 av_free(c
->nonBQPTable
);
982 av_free(c
->forcedQPTable
);
984 memset(c
, 0, sizeof(PPContext
));
989 void pp_postprocess(const uint8_t * src
[3], const int srcStride
[3],
990 uint8_t * dst
[3], const int dstStride
[3],
991 int width
, int height
,
992 const QP_STORE_T
*QP_store
, int QPStride
,
993 pp_mode_t
*vm
, void *vc
, int pict_type
)
995 int mbWidth
= (width
+15)>>4;
996 int mbHeight
= (height
+15)>>4;
997 PPMode
*mode
= (PPMode
*)vm
;
998 PPContext
*c
= (PPContext
*)vc
;
999 int minStride
= FFMAX(FFABS(srcStride
[0]), FFABS(dstStride
[0]));
1000 int absQPStride
= FFABS(QPStride
);
1002 // c->stride and c->QPStride are always positive
1003 if(c
->stride
< minStride
|| c
->qpStride
< absQPStride
)
1004 reallocBuffers(c
, width
, height
,
1005 FFMAX(minStride
, c
->stride
),
1006 FFMAX(c
->qpStride
, absQPStride
));
1008 if(QP_store
==NULL
|| (mode
->lumMode
& FORCE_QUANT
)){
1010 QP_store
= c
->forcedQPTable
;
1011 absQPStride
= QPStride
= 0;
1012 if(mode
->lumMode
& FORCE_QUANT
)
1013 for(i
=0; i
<mbWidth
; i
++) c
->forcedQPTable
[i
]= mode
->forcedQuant
;
1015 for(i
=0; i
<mbWidth
; i
++) c
->forcedQPTable
[i
]= 1;
1018 if(pict_type
& PP_PICT_TYPE_QP2
){
1020 const int count
= mbHeight
* absQPStride
;
1021 for(i
=0; i
<(count
>>2); i
++){
1022 ((uint32_t*)c
->stdQPTable
)[i
] = (((const uint32_t*)QP_store
)[i
]>>1) & 0x7F7F7F7F;
1024 for(i
<<=2; i
<count
; i
++){
1025 c
->stdQPTable
[i
] = QP_store
[i
]>>1;
1027 QP_store
= c
->stdQPTable
;
1028 QPStride
= absQPStride
;
1033 for(y
=0; y
<mbHeight
; y
++){
1034 for(x
=0; x
<mbWidth
; x
++){
1035 av_log(c
, AV_LOG_INFO
, "%2d ", QP_store
[x
+ y
*QPStride
]);
1037 av_log(c
, AV_LOG_INFO
, "\n");
1039 av_log(c
, AV_LOG_INFO
, "\n");
1042 if((pict_type
&7)!=3){
1045 const int count
= mbHeight
* QPStride
;
1046 for(i
=0; i
<(count
>>2); i
++){
1047 ((uint32_t*)c
->nonBQPTable
)[i
] = ((const uint32_t*)QP_store
)[i
] & 0x3F3F3F3F;
1049 for(i
<<=2; i
<count
; i
++){
1050 c
->nonBQPTable
[i
] = QP_store
[i
] & 0x3F;
1054 for(i
=0; i
<mbHeight
; i
++) {
1055 for(j
=0; j
<absQPStride
; j
++) {
1056 c
->nonBQPTable
[i
*absQPStride
+j
] = QP_store
[i
*QPStride
+j
] & 0x3F;
1062 av_log(c
, AV_LOG_DEBUG
, "using npp filters 0x%X/0x%X\n",
1063 mode
->lumMode
, mode
->chromMode
);
1065 postProcess(src
[0], srcStride
[0], dst
[0], dstStride
[0],
1066 width
, height
, QP_store
, QPStride
, 0, mode
, c
);
1068 width
= (width
)>>c
->hChromaSubSample
;
1069 height
= (height
)>>c
->vChromaSubSample
;
1071 if(mode
->chromMode
){
1072 postProcess(src
[1], srcStride
[1], dst
[1], dstStride
[1],
1073 width
, height
, QP_store
, QPStride
, 1, mode
, c
);
1074 postProcess(src
[2], srcStride
[2], dst
[2], dstStride
[2],
1075 width
, height
, QP_store
, QPStride
, 2, mode
, c
);
1077 else if(srcStride
[1] == dstStride
[1] && srcStride
[2] == dstStride
[2]){
1078 linecpy(dst
[1], src
[1], height
, srcStride
[1]);
1079 linecpy(dst
[2], src
[2], height
, srcStride
[2]);
1082 for(y
=0; y
<height
; y
++){
1083 memcpy(&(dst
[1][y
*dstStride
[1]]), &(src
[1][y
*srcStride
[1]]), width
);
1084 memcpy(&(dst
[2][y
*dstStride
[2]]), &(src
[2][y
*srcStride
[2]]), width
);