2 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #define _SVID_SOURCE //needed for MAP_ANONYMOUS
22 #define _DARWIN_C_SOURCE // needed for MAP_ANON
31 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
32 #define MAP_ANONYMOUS MAP_ANON
36 #define WIN32_LEAN_AND_MEAN
40 #include "swscale_internal.h"
42 #include "libavutil/intreadwrite.h"
43 #include "libavutil/x86_cpu.h"
44 #include "libavutil/avutil.h"
45 #include "libavutil/bswap.h"
46 #include "libavutil/opt.h"
47 #include "libavutil/pixdesc.h"
49 unsigned swscale_version(void)
51 return LIBSWSCALE_VERSION_INT
;
54 const char *swscale_configuration(void)
56 return FFMPEG_CONFIGURATION
;
59 const char *swscale_license(void)
61 #define LICENSE_PREFIX "libswscale license: "
62 return LICENSE_PREFIX FFMPEG_LICENSE
+ sizeof(LICENSE_PREFIX
) - 1;
65 #define RET 0xC3 //near return opcode for x86
67 #define isSupportedIn(x) ( \
68 (x)==PIX_FMT_YUV420P \
69 || (x)==PIX_FMT_YUVA420P \
70 || (x)==PIX_FMT_YUYV422 \
71 || (x)==PIX_FMT_UYVY422 \
72 || (x)==PIX_FMT_RGB48BE \
73 || (x)==PIX_FMT_RGB48LE \
74 || (x)==PIX_FMT_RGB32 \
75 || (x)==PIX_FMT_RGB32_1 \
76 || (x)==PIX_FMT_BGR24 \
77 || (x)==PIX_FMT_BGR565 \
78 || (x)==PIX_FMT_BGR555 \
79 || (x)==PIX_FMT_BGR32 \
80 || (x)==PIX_FMT_BGR32_1 \
81 || (x)==PIX_FMT_RGB24 \
82 || (x)==PIX_FMT_RGB565 \
83 || (x)==PIX_FMT_RGB555 \
84 || (x)==PIX_FMT_GRAY8 \
85 || (x)==PIX_FMT_Y400A \
86 || (x)==PIX_FMT_YUV410P \
87 || (x)==PIX_FMT_YUV440P \
88 || (x)==PIX_FMT_NV12 \
89 || (x)==PIX_FMT_NV21 \
90 || (x)==PIX_FMT_GRAY16BE \
91 || (x)==PIX_FMT_GRAY16LE \
92 || (x)==PIX_FMT_YUV444P \
93 || (x)==PIX_FMT_YUV422P \
94 || (x)==PIX_FMT_YUV411P \
95 || (x)==PIX_FMT_YUVJ420P \
96 || (x)==PIX_FMT_YUVJ422P \
97 || (x)==PIX_FMT_YUVJ440P \
98 || (x)==PIX_FMT_YUVJ444P \
99 || (x)==PIX_FMT_PAL8 \
100 || (x)==PIX_FMT_BGR8 \
101 || (x)==PIX_FMT_RGB8 \
102 || (x)==PIX_FMT_BGR4_BYTE \
103 || (x)==PIX_FMT_RGB4_BYTE \
104 || (x)==PIX_FMT_YUV440P \
105 || (x)==PIX_FMT_MONOWHITE \
106 || (x)==PIX_FMT_MONOBLACK \
107 || (x)==PIX_FMT_YUV420P16LE \
108 || (x)==PIX_FMT_YUV422P16LE \
109 || (x)==PIX_FMT_YUV444P16LE \
110 || (x)==PIX_FMT_YUV420P16BE \
111 || (x)==PIX_FMT_YUV422P16BE \
112 || (x)==PIX_FMT_YUV444P16BE \
115 int sws_isSupportedInput(enum PixelFormat pix_fmt
)
117 return isSupportedIn(pix_fmt
);
120 #define isSupportedOut(x) ( \
121 (x)==PIX_FMT_YUV420P \
122 || (x)==PIX_FMT_YUVA420P \
123 || (x)==PIX_FMT_YUYV422 \
124 || (x)==PIX_FMT_UYVY422 \
125 || (x)==PIX_FMT_YUV444P \
126 || (x)==PIX_FMT_YUV422P \
127 || (x)==PIX_FMT_YUV411P \
128 || (x)==PIX_FMT_YUVJ420P \
129 || (x)==PIX_FMT_YUVJ422P \
130 || (x)==PIX_FMT_YUVJ440P \
131 || (x)==PIX_FMT_YUVJ444P \
133 || (x)==PIX_FMT_NV12 \
134 || (x)==PIX_FMT_NV21 \
135 || (x)==PIX_FMT_GRAY16BE \
136 || (x)==PIX_FMT_GRAY16LE \
137 || (x)==PIX_FMT_GRAY8 \
138 || (x)==PIX_FMT_YUV410P \
139 || (x)==PIX_FMT_YUV440P \
140 || (x)==PIX_FMT_YUV420P16LE \
141 || (x)==PIX_FMT_YUV422P16LE \
142 || (x)==PIX_FMT_YUV444P16LE \
143 || (x)==PIX_FMT_YUV420P16BE \
144 || (x)==PIX_FMT_YUV422P16BE \
145 || (x)==PIX_FMT_YUV444P16BE \
148 int sws_isSupportedOutput(enum PixelFormat pix_fmt
)
150 return isSupportedOut(pix_fmt
);
153 extern const int32_t ff_yuv2rgb_coeffs
[8][4];
155 const char *sws_format_name(enum PixelFormat format
)
157 if ((unsigned)format
< PIX_FMT_NB
&& av_pix_fmt_descriptors
[format
].name
)
158 return av_pix_fmt_descriptors
[format
].name
;
160 return "Unknown format";
163 static double getSplineCoeff(double a
, double b
, double c
, double d
, double dist
)
165 // printf("%f %f %f %f %f\n", a,b,c,d,dist);
166 if (dist
<=1.0) return ((d
*dist
+ c
)*dist
+ b
)*dist
+a
;
167 else return getSplineCoeff( 0.0,
174 static int initFilter(int16_t **outFilter
, int16_t **filterPos
, int *outFilterSize
, int xInc
,
175 int srcW
, int dstW
, int filterAlign
, int one
, int flags
,
176 SwsVector
*srcFilter
, SwsVector
*dstFilter
, double param
[2])
182 int64_t *filter
=NULL
;
183 int64_t *filter2
=NULL
;
184 const int64_t fone
= 1LL<<54;
187 if (flags
& SWS_CPU_CAPS_MMX
)
188 __asm__
volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
191 // NOTE: the +1 is for the MMX scaler which reads over the end
192 FF_ALLOC_OR_GOTO(NULL
, *filterPos
, (dstW
+1)*sizeof(int16_t), fail
);
194 if (FFABS(xInc
- 0x10000) <10) { // unscaled
197 FF_ALLOCZ_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
199 for (i
=0; i
<dstW
; i
++) {
200 filter
[i
*filterSize
]= fone
;
204 } else if (flags
&SWS_POINT
) { // lame looking point sampling mode
208 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
210 xDstInSrc
= xInc
/2 - 0x8000;
211 for (i
=0; i
<dstW
; i
++) {
212 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
218 } else if ((xInc
<= (1<<16) && (flags
&SWS_AREA
)) || (flags
&SWS_FAST_BILINEAR
)) { // bilinear upscale
222 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
224 xDstInSrc
= xInc
/2 - 0x8000;
225 for (i
=0; i
<dstW
; i
++) {
226 int xx
= (xDstInSrc
- ((filterSize
-1)<<15) + (1<<15))>>16;
230 //bilinear upscale / linear interpolate / area averaging
231 for (j
=0; j
<filterSize
; j
++) {
232 int64_t coeff
= fone
- FFABS((xx
<<16) - xDstInSrc
)*(fone
>>16);
233 if (coeff
<0) coeff
=0;
234 filter
[i
*filterSize
+ j
]= coeff
;
243 if (flags
&SWS_BICUBIC
) sizeFactor
= 4;
244 else if (flags
&SWS_X
) sizeFactor
= 8;
245 else if (flags
&SWS_AREA
) sizeFactor
= 1; //downscale only, for upscale it is bilinear
246 else if (flags
&SWS_GAUSS
) sizeFactor
= 8; // infinite ;)
247 else if (flags
&SWS_LANCZOS
) sizeFactor
= param
[0] != SWS_PARAM_DEFAULT
? ceil(2*param
[0]) : 6;
248 else if (flags
&SWS_SINC
) sizeFactor
= 20; // infinite ;)
249 else if (flags
&SWS_SPLINE
) sizeFactor
= 20; // infinite ;)
250 else if (flags
&SWS_BILINEAR
) sizeFactor
= 2;
252 sizeFactor
= 0; //GCC warning killer
256 if (xInc
<= 1<<16) filterSize
= 1 + sizeFactor
; // upscale
257 else filterSize
= 1 + (sizeFactor
*srcW
+ dstW
- 1)/ dstW
;
259 if (filterSize
> srcW
-2) filterSize
=srcW
-2;
261 FF_ALLOC_OR_GOTO(NULL
, filter
, dstW
*sizeof(*filter
)*filterSize
, fail
);
263 xDstInSrc
= xInc
- 0x10000;
264 for (i
=0; i
<dstW
; i
++) {
265 int xx
= (xDstInSrc
- ((filterSize
-2)<<16)) / (1<<17);
268 for (j
=0; j
<filterSize
; j
++) {
269 int64_t d
= ((int64_t)FFABS((xx
<<17) - xDstInSrc
))<<13;
275 floatd
= d
* (1.0/(1<<30));
277 if (flags
& SWS_BICUBIC
) {
278 int64_t B
= (param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 0) * (1<<24);
279 int64_t C
= (param
[1] != SWS_PARAM_DEFAULT
? param
[1] : 0.6) * (1<<24);
280 int64_t dd
= ( d
*d
)>>30;
281 int64_t ddd
= (dd
*d
)>>30;
284 coeff
= (12*(1<<24)-9*B
-6*C
)*ddd
+ (-18*(1<<24)+12*B
+6*C
)*dd
+ (6*(1<<24)-2*B
)*(1<<30);
285 else if (d
< 1LL<<31)
286 coeff
= (-B
-6*C
)*ddd
+ (6*B
+30*C
)*dd
+ (-12*B
-48*C
)*d
+ (8*B
+24*C
)*(1<<30);
289 coeff
*= fone
>>(30+24);
291 /* else if (flags & SWS_X) {
292 double p= param ? param*0.01 : 0.3;
293 coeff = d ? sin(d*M_PI)/(d*M_PI) : 1.0;
294 coeff*= pow(2.0, - p*d*d);
296 else if (flags
& SWS_X
) {
297 double A
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 1.0;
301 c
= cos(floatd
*M_PI
);
304 if (c
<0.0) c
= -pow(-c
, A
);
306 coeff
= (c
*0.5 + 0.5)*fone
;
307 } else if (flags
& SWS_AREA
) {
308 int64_t d2
= d
- (1<<29);
309 if (d2
*xInc
< -(1LL<<(29+16))) coeff
= 1.0 * (1LL<<(30+16));
310 else if (d2
*xInc
< (1LL<<(29+16))) coeff
= -d2
*xInc
+ (1LL<<(29+16));
312 coeff
*= fone
>>(30+16);
313 } else if (flags
& SWS_GAUSS
) {
314 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
315 coeff
= (pow(2.0, - p
*floatd
*floatd
))*fone
;
316 } else if (flags
& SWS_SINC
) {
317 coeff
= (d
? sin(floatd
*M_PI
)/(floatd
*M_PI
) : 1.0)*fone
;
318 } else if (flags
& SWS_LANCZOS
) {
319 double p
= param
[0] != SWS_PARAM_DEFAULT
? param
[0] : 3.0;
320 coeff
= (d
? sin(floatd
*M_PI
)*sin(floatd
*M_PI
/p
)/(floatd
*floatd
*M_PI
*M_PI
/p
) : 1.0)*fone
;
321 if (floatd
>p
) coeff
=0;
322 } else if (flags
& SWS_BILINEAR
) {
324 if (coeff
<0) coeff
=0;
326 } else if (flags
& SWS_SPLINE
) {
327 double p
=-2.196152422706632;
328 coeff
= getSplineCoeff(1.0, 0.0, p
, -p
-1.0, floatd
) * fone
;
330 coeff
= 0.0; //GCC warning killer
334 filter
[i
*filterSize
+ j
]= coeff
;
341 /* apply src & dst Filter to filter -> filter2
344 assert(filterSize
>0);
345 filter2Size
= filterSize
;
346 if (srcFilter
) filter2Size
+= srcFilter
->length
- 1;
347 if (dstFilter
) filter2Size
+= dstFilter
->length
- 1;
348 assert(filter2Size
>0);
349 FF_ALLOCZ_OR_GOTO(NULL
, filter2
, filter2Size
*dstW
*sizeof(*filter2
), fail
);
351 for (i
=0; i
<dstW
; i
++) {
355 for (k
=0; k
<srcFilter
->length
; k
++) {
356 for (j
=0; j
<filterSize
; j
++)
357 filter2
[i
*filter2Size
+ k
+ j
] += srcFilter
->coeff
[k
]*filter
[i
*filterSize
+ j
];
360 for (j
=0; j
<filterSize
; j
++)
361 filter2
[i
*filter2Size
+ j
]= filter
[i
*filterSize
+ j
];
365 (*filterPos
)[i
]+= (filterSize
-1)/2 - (filter2Size
-1)/2;
369 /* try to reduce the filter-size (step1 find size and shift left) */
370 // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
372 for (i
=dstW
-1; i
>=0; i
--) {
373 int min
= filter2Size
;
377 /* get rid of near zero elements on the left by shifting left */
378 for (j
=0; j
<filter2Size
; j
++) {
380 cutOff
+= FFABS(filter2
[i
*filter2Size
]);
382 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
384 /* preserve monotonicity because the core can't handle the filter otherwise */
385 if (i
<dstW
-1 && (*filterPos
)[i
] >= (*filterPos
)[i
+1]) break;
387 // move filter coefficients left
388 for (k
=1; k
<filter2Size
; k
++)
389 filter2
[i
*filter2Size
+ k
- 1]= filter2
[i
*filter2Size
+ k
];
390 filter2
[i
*filter2Size
+ k
- 1]= 0;
395 /* count near zeros on the right */
396 for (j
=filter2Size
-1; j
>0; j
--) {
397 cutOff
+= FFABS(filter2
[i
*filter2Size
+ j
]);
399 if (cutOff
> SWS_MAX_REDUCE_CUTOFF
*fone
) break;
403 if (min
>minFilterSize
) minFilterSize
= min
;
406 if (flags
& SWS_CPU_CAPS_ALTIVEC
) {
407 // we can handle the special case 4,
408 // so we don't want to go to the full 8
409 if (minFilterSize
< 5)
412 // We really don't want to waste our time
413 // doing useless computation, so fall back on
414 // the scalar C code for very small filters.
415 // Vectorizing is worth it only if you have a
416 // decent-sized vector.
417 if (minFilterSize
< 3)
421 if (flags
& SWS_CPU_CAPS_MMX
) {
422 // special case for unscaled vertical filtering
423 if (minFilterSize
== 1 && filterAlign
== 2)
427 assert(minFilterSize
> 0);
428 filterSize
= (minFilterSize
+(filterAlign
-1)) & (~(filterAlign
-1));
429 assert(filterSize
> 0);
430 filter
= av_malloc(filterSize
*dstW
*sizeof(*filter
));
431 if (filterSize
>= MAX_FILTER_SIZE
*16/((flags
&SWS_ACCURATE_RND
) ? APCK_SIZE
: 16) || !filter
)
433 *outFilterSize
= filterSize
;
435 if (flags
&SWS_PRINT_INFO
)
436 av_log(NULL
, AV_LOG_VERBOSE
, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size
, filterSize
);
437 /* try to reduce the filter-size (step2 reduce it) */
438 for (i
=0; i
<dstW
; i
++) {
441 for (j
=0; j
<filterSize
; j
++) {
442 if (j
>=filter2Size
) filter
[i
*filterSize
+ j
]= 0;
443 else filter
[i
*filterSize
+ j
]= filter2
[i
*filter2Size
+ j
];
444 if((flags
& SWS_BITEXACT
) && j
>=minFilterSize
)
445 filter
[i
*filterSize
+ j
]= 0;
449 //FIXME try to align filterPos if possible
452 for (i
=0; i
<dstW
; i
++) {
454 if ((*filterPos
)[i
] < 0) {
455 // move filter coefficients left to compensate for filterPos
456 for (j
=1; j
<filterSize
; j
++) {
457 int left
= FFMAX(j
+ (*filterPos
)[i
], 0);
458 filter
[i
*filterSize
+ left
] += filter
[i
*filterSize
+ j
];
459 filter
[i
*filterSize
+ j
]=0;
464 if ((*filterPos
)[i
] + filterSize
> srcW
) {
465 int shift
= (*filterPos
)[i
] + filterSize
- srcW
;
466 // move filter coefficients right to compensate for filterPos
467 for (j
=filterSize
-2; j
>=0; j
--) {
468 int right
= FFMIN(j
+ shift
, filterSize
-1);
469 filter
[i
*filterSize
+right
] += filter
[i
*filterSize
+j
];
470 filter
[i
*filterSize
+j
]=0;
472 (*filterPos
)[i
]= srcW
- filterSize
;
476 // Note the +1 is for the MMX scaler which reads over the end
477 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
478 FF_ALLOCZ_OR_GOTO(NULL
, *outFilter
, *outFilterSize
*(dstW
+1)*sizeof(int16_t), fail
);
480 /* normalize & store in outFilter */
481 for (i
=0; i
<dstW
; i
++) {
486 for (j
=0; j
<filterSize
; j
++) {
487 sum
+= filter
[i
*filterSize
+ j
];
489 sum
= (sum
+ one
/2)/ one
;
490 for (j
=0; j
<*outFilterSize
; j
++) {
491 int64_t v
= filter
[i
*filterSize
+ j
] + error
;
492 int intV
= ROUNDED_DIV(v
, sum
);
493 (*outFilter
)[i
*(*outFilterSize
) + j
]= intV
;
498 (*filterPos
)[dstW
]= (*filterPos
)[dstW
-1]; // the MMX scaler will read over the end
499 for (i
=0; i
<*outFilterSize
; i
++) {
500 int j
= dstW
*(*outFilterSize
);
501 (*outFilter
)[j
+ i
]= (*outFilter
)[j
+ i
- (*outFilterSize
)];
511 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
512 static int initMMX2HScaler(int dstW
, int xInc
, uint8_t *filterCode
, int16_t *filter
, int32_t *filterPos
, int numSplits
)
515 x86_reg imm8OfPShufW1A
;
516 x86_reg imm8OfPShufW2A
;
517 x86_reg fragmentLengthA
;
519 x86_reg imm8OfPShufW1B
;
520 x86_reg imm8OfPShufW2B
;
521 x86_reg fragmentLengthB
;
526 // create an optimized horizontal scaling routine
527 /* This scaler is made of runtime-generated MMX2 code using specially
528 * tuned pshufw instructions. For every four output pixels, if four
529 * input pixels are enough for the fast bilinear scaling, then a chunk
530 * of fragmentB is used. If five input pixels are needed, then a chunk
531 * of fragmentA is used.
540 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
541 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
542 "movd 1(%%"REG_c
", %%"REG_S
"), %%mm1 \n\t"
543 "punpcklbw %%mm7, %%mm1 \n\t"
544 "punpcklbw %%mm7, %%mm0 \n\t"
545 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
547 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
549 "psubw %%mm1, %%mm0 \n\t"
550 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
551 "pmullw %%mm3, %%mm0 \n\t"
552 "psllw $7, %%mm1 \n\t"
553 "paddw %%mm1, %%mm0 \n\t"
555 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
557 "add $8, %%"REG_a
" \n\t"
561 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
562 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
563 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
568 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
572 :"=r" (fragmentA
), "=r" (imm8OfPShufW1A
), "=r" (imm8OfPShufW2A
),
573 "=r" (fragmentLengthA
)
580 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
581 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
582 "punpcklbw %%mm7, %%mm0 \n\t"
583 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
585 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
587 "psubw %%mm1, %%mm0 \n\t"
588 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
589 "pmullw %%mm3, %%mm0 \n\t"
590 "psllw $7, %%mm1 \n\t"
591 "paddw %%mm1, %%mm0 \n\t"
593 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
595 "add $8, %%"REG_a
" \n\t"
599 "lea " LOCAL_MANGLE(0b
) ", %0 \n\t"
600 "lea " LOCAL_MANGLE(1b
) ", %1 \n\t"
601 "lea " LOCAL_MANGLE(2b
) ", %2 \n\t"
606 "lea " LOCAL_MANGLE(9b
) ", %3 \n\t"
610 :"=r" (fragmentB
), "=r" (imm8OfPShufW1B
), "=r" (imm8OfPShufW2B
),
611 "=r" (fragmentLengthB
)
614 xpos
= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
617 for (i
=0; i
<dstW
/numSplits
; i
++) {
622 int b
=((xpos
+xInc
)>>16) - xx
;
623 int c
=((xpos
+xInc
*2)>>16) - xx
;
624 int d
=((xpos
+xInc
*3)>>16) - xx
;
626 uint8_t *fragment
= (d
+1<4) ? fragmentB
: fragmentA
;
627 x86_reg imm8OfPShufW1
= (d
+1<4) ? imm8OfPShufW1B
: imm8OfPShufW1A
;
628 x86_reg imm8OfPShufW2
= (d
+1<4) ? imm8OfPShufW2B
: imm8OfPShufW2A
;
629 x86_reg fragmentLength
= (d
+1<4) ? fragmentLengthB
: fragmentLengthA
;
630 int maxShift
= 3-(d
+inc
);
634 filter
[i
] = (( xpos
& 0xFFFF) ^ 0xFFFF)>>9;
635 filter
[i
+1] = (((xpos
+xInc
) & 0xFFFF) ^ 0xFFFF)>>9;
636 filter
[i
+2] = (((xpos
+xInc
*2) & 0xFFFF) ^ 0xFFFF)>>9;
637 filter
[i
+3] = (((xpos
+xInc
*3) & 0xFFFF) ^ 0xFFFF)>>9;
640 memcpy(filterCode
+ fragmentPos
, fragment
, fragmentLength
);
642 filterCode
[fragmentPos
+ imm8OfPShufW1
]=
643 (a
+inc
) | ((b
+inc
)<<2) | ((c
+inc
)<<4) | ((d
+inc
)<<6);
644 filterCode
[fragmentPos
+ imm8OfPShufW2
]=
645 a
| (b
<<2) | (c
<<4) | (d
<<6);
647 if (i
+4-inc
>=dstW
) shift
=maxShift
; //avoid overread
648 else if ((filterPos
[i
/2]&3) <= maxShift
) shift
=filterPos
[i
/2]&3; //Align
650 if (shift
&& i
>=shift
) {
651 filterCode
[fragmentPos
+ imm8OfPShufW1
]+= 0x55*shift
;
652 filterCode
[fragmentPos
+ imm8OfPShufW2
]+= 0x55*shift
;
653 filterPos
[i
/2]-=shift
;
657 fragmentPos
+= fragmentLength
;
660 filterCode
[fragmentPos
]= RET
;
665 filterPos
[((i
/2)+1)&(~1)]= xpos
>>16; // needed to jump to the next part
667 return fragmentPos
+ 1;
669 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
671 static void getSubSampleFactors(int *h
, int *v
, enum PixelFormat format
)
673 *h
= av_pix_fmt_descriptors
[format
].log2_chroma_w
;
674 *v
= av_pix_fmt_descriptors
[format
].log2_chroma_h
;
677 static int update_flags_cpu(int flags
);
679 int sws_setColorspaceDetails(SwsContext
*c
, const int inv_table
[4], int srcRange
, const int table
[4], int dstRange
, int brightness
, int contrast
, int saturation
)
681 memcpy(c
->srcColorspaceTable
, inv_table
, sizeof(int)*4);
682 memcpy(c
->dstColorspaceTable
, table
, sizeof(int)*4);
684 c
->brightness
= brightness
;
685 c
->contrast
= contrast
;
686 c
->saturation
= saturation
;
687 c
->srcRange
= srcRange
;
688 c
->dstRange
= dstRange
;
689 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
691 c
->dstFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[c
->dstFormat
]);
692 c
->srcFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[c
->srcFormat
]);
693 c
->flags
= update_flags_cpu(c
->flags
);
695 ff_yuv2rgb_c_init_tables(c
, inv_table
, srcRange
, brightness
, contrast
, saturation
);
699 if (c
->flags
& SWS_CPU_CAPS_ALTIVEC
)
700 ff_yuv2rgb_init_tables_altivec(c
, inv_table
, brightness
, contrast
, saturation
);
705 int sws_getColorspaceDetails(SwsContext
*c
, int **inv_table
, int *srcRange
, int **table
, int *dstRange
, int *brightness
, int *contrast
, int *saturation
)
707 if (isYUV(c
->dstFormat
) || isGray(c
->dstFormat
)) return -1;
709 *inv_table
= c
->srcColorspaceTable
;
710 *table
= c
->dstColorspaceTable
;
711 *srcRange
= c
->srcRange
;
712 *dstRange
= c
->dstRange
;
713 *brightness
= c
->brightness
;
714 *contrast
= c
->contrast
;
715 *saturation
= c
->saturation
;
720 static int handle_jpeg(enum PixelFormat
*format
)
723 case PIX_FMT_YUVJ420P
: *format
= PIX_FMT_YUV420P
; return 1;
724 case PIX_FMT_YUVJ422P
: *format
= PIX_FMT_YUV422P
; return 1;
725 case PIX_FMT_YUVJ444P
: *format
= PIX_FMT_YUV444P
; return 1;
726 case PIX_FMT_YUVJ440P
: *format
= PIX_FMT_YUV440P
; return 1;
731 static int update_flags_cpu(int flags
)
733 #if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
734 flags
&= ~( SWS_CPU_CAPS_MMX
738 |SWS_CPU_CAPS_ALTIVEC
740 flags
|= ff_hardcodedcpuflags();
741 #endif /* CONFIG_RUNTIME_CPUDETECT */
745 SwsContext
*sws_alloc_context(void)
747 SwsContext
*c
= av_mallocz(sizeof(SwsContext
));
749 c
->av_class
= &sws_context_class
;
750 av_opt_set_defaults(c
);
755 int sws_init_context(SwsContext
*c
, SwsFilter
*srcFilter
, SwsFilter
*dstFilter
)
758 int usesVFilter
, usesHFilter
;
760 SwsFilter dummyFilter
= {NULL
, NULL
, NULL
, NULL
};
766 enum PixelFormat srcFormat
= c
->srcFormat
;
767 enum PixelFormat dstFormat
= c
->dstFormat
;
769 flags
= c
->flags
= update_flags_cpu(c
->flags
);
771 if (flags
& SWS_CPU_CAPS_MMX
)
772 __asm__
volatile("emms\n\t"::: "memory");
774 if (!rgb15to16
) sws_rgb2rgb_init(flags
);
776 unscaled
= (srcW
== dstW
&& srcH
== dstH
);
778 if (!isSupportedIn(srcFormat
)) {
779 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat
));
780 return AVERROR(EINVAL
);
782 if (!isSupportedOut(dstFormat
)) {
783 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat
));
784 return AVERROR(EINVAL
);
787 i
= flags
& ( SWS_POINT
798 if(!i
|| (i
& (i
-1))) {
799 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Exactly one scaler algorithm must be chosen\n");
800 return AVERROR(EINVAL
);
803 if (srcW
<4 || srcH
<1 || dstW
<8 || dstH
<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
804 av_log(NULL
, AV_LOG_ERROR
, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
805 srcW
, srcH
, dstW
, dstH
);
806 return AVERROR(EINVAL
);
808 if(srcW
> VOFW
|| dstW
> VOFW
) {
809 av_log(NULL
, AV_LOG_ERROR
, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW
)" change VOF/VOFW and recompile\n");
810 return AVERROR(EINVAL
);
813 if (!dstFilter
) dstFilter
= &dummyFilter
;
814 if (!srcFilter
) srcFilter
= &dummyFilter
;
816 c
->lumXInc
= ((srcW
<<16) + (dstW
>>1))/dstW
;
817 c
->lumYInc
= ((srcH
<<16) + (dstH
>>1))/dstH
;
818 c
->dstFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[dstFormat
]);
819 c
->srcFormatBpp
= av_get_bits_per_pixel(&av_pix_fmt_descriptors
[srcFormat
]);
820 c
->vRounder
= 4* 0x0001000100010001ULL
;
822 usesVFilter
= (srcFilter
->lumV
&& srcFilter
->lumV
->length
>1) ||
823 (srcFilter
->chrV
&& srcFilter
->chrV
->length
>1) ||
824 (dstFilter
->lumV
&& dstFilter
->lumV
->length
>1) ||
825 (dstFilter
->chrV
&& dstFilter
->chrV
->length
>1);
826 usesHFilter
= (srcFilter
->lumH
&& srcFilter
->lumH
->length
>1) ||
827 (srcFilter
->chrH
&& srcFilter
->chrH
->length
>1) ||
828 (dstFilter
->lumH
&& dstFilter
->lumH
->length
>1) ||
829 (dstFilter
->chrH
&& dstFilter
->chrH
->length
>1);
831 getSubSampleFactors(&c
->chrSrcHSubSample
, &c
->chrSrcVSubSample
, srcFormat
);
832 getSubSampleFactors(&c
->chrDstHSubSample
, &c
->chrDstVSubSample
, dstFormat
);
834 // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
835 if (isAnyRGB(dstFormat
) && !(flags
&SWS_FULL_CHR_H_INT
)) c
->chrDstHSubSample
=1;
837 // drop some chroma lines if the user wants it
838 c
->vChrDrop
= (flags
&SWS_SRC_V_CHR_DROP_MASK
)>>SWS_SRC_V_CHR_DROP_SHIFT
;
839 c
->chrSrcVSubSample
+= c
->vChrDrop
;
841 // drop every other pixel for chroma calculation unless user wants full chroma
842 if (isAnyRGB(srcFormat
) && !(flags
&SWS_FULL_CHR_H_INP
)
843 && srcFormat
!=PIX_FMT_RGB8
&& srcFormat
!=PIX_FMT_BGR8
844 && srcFormat
!=PIX_FMT_RGB4
&& srcFormat
!=PIX_FMT_BGR4
845 && srcFormat
!=PIX_FMT_RGB4_BYTE
&& srcFormat
!=PIX_FMT_BGR4_BYTE
846 && ((dstW
>>c
->chrDstHSubSample
) <= (srcW
>>1) || (flags
&SWS_FAST_BILINEAR
)))
847 c
->chrSrcHSubSample
=1;
849 // Note the -((-x)>>y) is so that we always round toward +inf.
850 c
->chrSrcW
= -((-srcW
) >> c
->chrSrcHSubSample
);
851 c
->chrSrcH
= -((-srcH
) >> c
->chrSrcVSubSample
);
852 c
->chrDstW
= -((-dstW
) >> c
->chrDstHSubSample
);
853 c
->chrDstH
= -((-dstH
) >> c
->chrDstVSubSample
);
855 /* unscaled special cases */
856 if (unscaled
&& !usesHFilter
&& !usesVFilter
&& (c
->srcRange
== c
->dstRange
|| isAnyRGB(dstFormat
))) {
857 ff_get_unscaled_swscale(c
);
860 if (flags
&SWS_PRINT_INFO
)
861 av_log(c
, AV_LOG_INFO
, "using unscaled %s -> %s special converter\n",
862 sws_format_name(srcFormat
), sws_format_name(dstFormat
));
867 if (flags
& SWS_CPU_CAPS_MMX2
) {
868 c
->canMMX2BeUsed
= (dstW
>=srcW
&& (dstW
&31)==0 && (srcW
&15)==0) ? 1 : 0;
869 if (!c
->canMMX2BeUsed
&& dstW
>=srcW
&& (srcW
&15)==0 && (flags
&SWS_FAST_BILINEAR
)) {
870 if (flags
&SWS_PRINT_INFO
)
871 av_log(c
, AV_LOG_INFO
, "output width is not a multiple of 32 -> no MMX2 scaler\n");
873 if (usesHFilter
) c
->canMMX2BeUsed
=0;
878 c
->chrXInc
= ((c
->chrSrcW
<<16) + (c
->chrDstW
>>1))/c
->chrDstW
;
879 c
->chrYInc
= ((c
->chrSrcH
<<16) + (c
->chrDstH
>>1))/c
->chrDstH
;
881 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
882 // but only for the FAST_BILINEAR mode otherwise do correct scaling
883 // n-2 is the last chrominance sample available
884 // this is not perfect, but no one should notice the difference, the more correct variant
885 // would be like the vertical one, but that would require some special code for the
886 // first and last pixel
887 if (flags
&SWS_FAST_BILINEAR
) {
888 if (c
->canMMX2BeUsed
) {
892 //we don't use the x86 asm scaler if MMX is available
893 else if (flags
& SWS_CPU_CAPS_MMX
) {
894 c
->lumXInc
= ((srcW
-2)<<16)/(dstW
-2) - 20;
895 c
->chrXInc
= ((c
->chrSrcW
-2)<<16)/(c
->chrDstW
-2) - 20;
899 /* precalculate horizontal scaler filter coefficients */
901 #if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
902 // can't downscale !!!
903 if (c
->canMMX2BeUsed
&& (flags
& SWS_FAST_BILINEAR
)) {
904 c
->lumMmx2FilterCodeSize
= initMMX2HScaler( dstW
, c
->lumXInc
, NULL
, NULL
, NULL
, 8);
905 c
->chrMmx2FilterCodeSize
= initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, NULL
, NULL
, NULL
, 4);
908 c
->lumMmx2FilterCode
= mmap(NULL
, c
->lumMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
909 c
->chrMmx2FilterCode
= mmap(NULL
, c
->chrMmx2FilterCodeSize
, PROT_READ
| PROT_WRITE
, MAP_PRIVATE
| MAP_ANONYMOUS
, -1, 0);
910 #elif HAVE_VIRTUALALLOC
911 c
->lumMmx2FilterCode
= VirtualAlloc(NULL
, c
->lumMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
912 c
->chrMmx2FilterCode
= VirtualAlloc(NULL
, c
->chrMmx2FilterCodeSize
, MEM_COMMIT
, PAGE_EXECUTE_READWRITE
);
914 c
->lumMmx2FilterCode
= av_malloc(c
->lumMmx2FilterCodeSize
);
915 c
->chrMmx2FilterCode
= av_malloc(c
->chrMmx2FilterCodeSize
);
918 if (!c
->lumMmx2FilterCode
|| !c
->chrMmx2FilterCode
)
919 return AVERROR(ENOMEM
);
920 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilter
, (dstW
/8+8)*sizeof(int16_t), fail
);
921 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilter
, (c
->chrDstW
/4+8)*sizeof(int16_t), fail
);
922 FF_ALLOCZ_OR_GOTO(c
, c
->hLumFilterPos
, (dstW
/2/8+8)*sizeof(int32_t), fail
);
923 FF_ALLOCZ_OR_GOTO(c
, c
->hChrFilterPos
, (c
->chrDstW
/2/4+8)*sizeof(int32_t), fail
);
925 initMMX2HScaler( dstW
, c
->lumXInc
, c
->lumMmx2FilterCode
, c
->hLumFilter
, c
->hLumFilterPos
, 8);
926 initMMX2HScaler(c
->chrDstW
, c
->chrXInc
, c
->chrMmx2FilterCode
, c
->hChrFilter
, c
->hChrFilterPos
, 4);
929 mprotect(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
930 mprotect(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
, PROT_EXEC
| PROT_READ
);
933 #endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
935 const int filterAlign
=
936 (flags
& SWS_CPU_CAPS_MMX
) ? 4 :
937 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
940 if (initFilter(&c
->hLumFilter
, &c
->hLumFilterPos
, &c
->hLumFilterSize
, c
->lumXInc
,
941 srcW
, dstW
, filterAlign
, 1<<14,
942 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
943 srcFilter
->lumH
, dstFilter
->lumH
, c
->param
) < 0)
945 if (initFilter(&c
->hChrFilter
, &c
->hChrFilterPos
, &c
->hChrFilterSize
, c
->chrXInc
,
946 c
->chrSrcW
, c
->chrDstW
, filterAlign
, 1<<14,
947 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
948 srcFilter
->chrH
, dstFilter
->chrH
, c
->param
) < 0)
951 } // initialize horizontal stuff
953 /* precalculate vertical scaler filter coefficients */
955 const int filterAlign
=
956 (flags
& SWS_CPU_CAPS_MMX
) && (flags
& SWS_ACCURATE_RND
) ? 2 :
957 (flags
& SWS_CPU_CAPS_ALTIVEC
) ? 8 :
960 if (initFilter(&c
->vLumFilter
, &c
->vLumFilterPos
, &c
->vLumFilterSize
, c
->lumYInc
,
961 srcH
, dstH
, filterAlign
, (1<<12),
962 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BICUBIC
) : flags
,
963 srcFilter
->lumV
, dstFilter
->lumV
, c
->param
) < 0)
965 if (initFilter(&c
->vChrFilter
, &c
->vChrFilterPos
, &c
->vChrFilterSize
, c
->chrYInc
,
966 c
->chrSrcH
, c
->chrDstH
, filterAlign
, (1<<12),
967 (flags
&SWS_BICUBLIN
) ? (flags
|SWS_BILINEAR
) : flags
,
968 srcFilter
->chrV
, dstFilter
->chrV
, c
->param
) < 0)
972 FF_ALLOC_OR_GOTO(c
, c
->vYCoeffsBank
, sizeof (vector
signed short)*c
->vLumFilterSize
*c
->dstH
, fail
);
973 FF_ALLOC_OR_GOTO(c
, c
->vCCoeffsBank
, sizeof (vector
signed short)*c
->vChrFilterSize
*c
->chrDstH
, fail
);
975 for (i
=0;i
<c
->vLumFilterSize
*c
->dstH
;i
++) {
977 short *p
= (short *)&c
->vYCoeffsBank
[i
];
979 p
[j
] = c
->vLumFilter
[i
];
982 for (i
=0;i
<c
->vChrFilterSize
*c
->chrDstH
;i
++) {
984 short *p
= (short *)&c
->vCCoeffsBank
[i
];
986 p
[j
] = c
->vChrFilter
[i
];
991 // calculate buffer sizes so that they won't run out while handling these damn slices
992 c
->vLumBufSize
= c
->vLumFilterSize
;
993 c
->vChrBufSize
= c
->vChrFilterSize
;
994 for (i
=0; i
<dstH
; i
++) {
995 int chrI
= i
*c
->chrDstH
/ dstH
;
996 int nextSlice
= FFMAX(c
->vLumFilterPos
[i
] + c
->vLumFilterSize
- 1,
997 ((c
->vChrFilterPos
[chrI
] + c
->vChrFilterSize
- 1)<<c
->chrSrcVSubSample
));
999 nextSlice
>>= c
->chrSrcVSubSample
;
1000 nextSlice
<<= c
->chrSrcVSubSample
;
1001 if (c
->vLumFilterPos
[i
] + c
->vLumBufSize
< nextSlice
)
1002 c
->vLumBufSize
= nextSlice
- c
->vLumFilterPos
[i
];
1003 if (c
->vChrFilterPos
[chrI
] + c
->vChrBufSize
< (nextSlice
>>c
->chrSrcVSubSample
))
1004 c
->vChrBufSize
= (nextSlice
>>c
->chrSrcVSubSample
) - c
->vChrFilterPos
[chrI
];
1007 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
1008 // allocate several megabytes to handle all possible cases)
1009 FF_ALLOC_OR_GOTO(c
, c
->lumPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1010 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
, c
->vChrBufSize
*2*sizeof(int16_t*), fail
);
1011 if (CONFIG_SWSCALE_ALPHA
&& isALPHA(c
->srcFormat
) && isALPHA(c
->dstFormat
))
1012 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
, c
->vLumBufSize
*2*sizeof(int16_t*), fail
);
1013 //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
1014 /* align at 16 bytes for AltiVec */
1015 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1016 FF_ALLOCZ_OR_GOTO(c
, c
->lumPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1017 c
->lumPixBuf
[i
] = c
->lumPixBuf
[i
+c
->vLumBufSize
];
1019 for (i
=0; i
<c
->vChrBufSize
; i
++) {
1020 FF_ALLOC_OR_GOTO(c
, c
->chrPixBuf
[i
+c
->vChrBufSize
], (VOF
+1)*2, fail
);
1021 c
->chrPixBuf
[i
] = c
->chrPixBuf
[i
+c
->vChrBufSize
];
1023 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
)
1024 for (i
=0; i
<c
->vLumBufSize
; i
++) {
1025 FF_ALLOCZ_OR_GOTO(c
, c
->alpPixBuf
[i
+c
->vLumBufSize
], VOF
+1, fail
);
1026 c
->alpPixBuf
[i
] = c
->alpPixBuf
[i
+c
->vLumBufSize
];
1029 //try to avoid drawing green stuff between the right end and the stride end
1030 for (i
=0; i
<c
->vChrBufSize
; i
++) memset(c
->chrPixBuf
[i
], 64, (VOF
+1)*2);
1032 assert(2*VOFW
== VOF
);
1034 assert(c
->chrDstH
<= dstH
);
1036 if (flags
&SWS_PRINT_INFO
) {
1037 if (flags
&SWS_FAST_BILINEAR
) av_log(c
, AV_LOG_INFO
, "FAST_BILINEAR scaler, ");
1038 else if (flags
&SWS_BILINEAR
) av_log(c
, AV_LOG_INFO
, "BILINEAR scaler, ");
1039 else if (flags
&SWS_BICUBIC
) av_log(c
, AV_LOG_INFO
, "BICUBIC scaler, ");
1040 else if (flags
&SWS_X
) av_log(c
, AV_LOG_INFO
, "Experimental scaler, ");
1041 else if (flags
&SWS_POINT
) av_log(c
, AV_LOG_INFO
, "Nearest Neighbor / POINT scaler, ");
1042 else if (flags
&SWS_AREA
) av_log(c
, AV_LOG_INFO
, "Area Averaging scaler, ");
1043 else if (flags
&SWS_BICUBLIN
) av_log(c
, AV_LOG_INFO
, "luma BICUBIC / chroma BILINEAR scaler, ");
1044 else if (flags
&SWS_GAUSS
) av_log(c
, AV_LOG_INFO
, "Gaussian scaler, ");
1045 else if (flags
&SWS_SINC
) av_log(c
, AV_LOG_INFO
, "Sinc scaler, ");
1046 else if (flags
&SWS_LANCZOS
) av_log(c
, AV_LOG_INFO
, "Lanczos scaler, ");
1047 else if (flags
&SWS_SPLINE
) av_log(c
, AV_LOG_INFO
, "Bicubic spline scaler, ");
1048 else av_log(c
, AV_LOG_INFO
, "ehh flags invalid?! ");
1050 av_log(c
, AV_LOG_INFO
, "from %s to %s%s ",
1051 sws_format_name(srcFormat
),
1053 dstFormat
== PIX_FMT_BGR555
|| dstFormat
== PIX_FMT_BGR565
||
1054 dstFormat
== PIX_FMT_RGB444BE
|| dstFormat
== PIX_FMT_RGB444LE
||
1055 dstFormat
== PIX_FMT_BGR444BE
|| dstFormat
== PIX_FMT_BGR444LE
? "dithered " : "",
1059 sws_format_name(dstFormat
));
1061 if (flags
& SWS_CPU_CAPS_MMX2
) av_log(c
, AV_LOG_INFO
, "using MMX2\n");
1062 else if (flags
& SWS_CPU_CAPS_3DNOW
) av_log(c
, AV_LOG_INFO
, "using 3DNOW\n");
1063 else if (flags
& SWS_CPU_CAPS_MMX
) av_log(c
, AV_LOG_INFO
, "using MMX\n");
1064 else if (flags
& SWS_CPU_CAPS_ALTIVEC
) av_log(c
, AV_LOG_INFO
, "using AltiVec\n");
1065 else av_log(c
, AV_LOG_INFO
, "using C\n");
1067 if (flags
& SWS_CPU_CAPS_MMX
) {
1068 if (c
->canMMX2BeUsed
&& (flags
&SWS_FAST_BILINEAR
))
1069 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
1071 if (c
->hLumFilterSize
==4)
1072 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal luminance scaling\n");
1073 else if (c
->hLumFilterSize
==8)
1074 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal luminance scaling\n");
1076 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal luminance scaling\n");
1078 if (c
->hChrFilterSize
==4)
1079 av_log(c
, AV_LOG_VERBOSE
, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
1080 else if (c
->hChrFilterSize
==8)
1081 av_log(c
, AV_LOG_VERBOSE
, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
1083 av_log(c
, AV_LOG_VERBOSE
, "using n-tap MMX scaler for horizontal chrominance scaling\n");
1087 av_log(c
, AV_LOG_VERBOSE
, "using x86 asm scaler for horizontal scaling\n");
1089 if (flags
& SWS_FAST_BILINEAR
)
1090 av_log(c
, AV_LOG_VERBOSE
, "using FAST_BILINEAR C scaler for horizontal scaling\n");
1092 av_log(c
, AV_LOG_VERBOSE
, "using C scaler for horizontal scaling\n");
1095 if (isPlanarYUV(dstFormat
)) {
1096 if (c
->vLumFilterSize
==1)
1097 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1099 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1101 if (c
->vLumFilterSize
==1 && c
->vChrFilterSize
==2)
1102 av_log(c
, AV_LOG_VERBOSE
, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
1103 " 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1104 else if (c
->vLumFilterSize
==2 && c
->vChrFilterSize
==2)
1105 av_log(c
, AV_LOG_VERBOSE
, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1107 av_log(c
, AV_LOG_VERBOSE
, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1110 if (dstFormat
==PIX_FMT_BGR24
)
1111 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR24 converter\n",
1112 (flags
& SWS_CPU_CAPS_MMX2
) ? "MMX2" : ((flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C"));
1113 else if (dstFormat
==PIX_FMT_RGB32
)
1114 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR32 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1115 else if (dstFormat
==PIX_FMT_BGR565
)
1116 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR16 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1117 else if (dstFormat
==PIX_FMT_BGR555
)
1118 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR15 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1119 else if (dstFormat
== PIX_FMT_RGB444BE
|| dstFormat
== PIX_FMT_RGB444LE
||
1120 dstFormat
== PIX_FMT_BGR444BE
|| dstFormat
== PIX_FMT_BGR444LE
)
1121 av_log(c
, AV_LOG_VERBOSE
, "using %s YV12->BGR12 converter\n", (flags
& SWS_CPU_CAPS_MMX
) ? "MMX" : "C");
1123 av_log(c
, AV_LOG_VERBOSE
, "%dx%d -> %dx%d\n", srcW
, srcH
, dstW
, dstH
);
1124 av_log(c
, AV_LOG_DEBUG
, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1125 c
->srcW
, c
->srcH
, c
->dstW
, c
->dstH
, c
->lumXInc
, c
->lumYInc
);
1126 av_log(c
, AV_LOG_DEBUG
, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1127 c
->chrSrcW
, c
->chrSrcH
, c
->chrDstW
, c
->chrDstH
, c
->chrXInc
, c
->chrYInc
);
1130 c
->swScale
= ff_getSwsFunc(c
);
1132 fail
: //FIXME replace things by appropriate error codes
1136 #if FF_API_SWS_GETCONTEXT
1137 SwsContext
*sws_getContext(int srcW
, int srcH
, enum PixelFormat srcFormat
,
1138 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
1139 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
1143 if(!(c
=sws_alloc_context()))
1151 c
->srcRange
= handle_jpeg(&srcFormat
);
1152 c
->dstRange
= handle_jpeg(&dstFormat
);
1153 c
->srcFormat
= srcFormat
;
1154 c
->dstFormat
= dstFormat
;
1157 c
->param
[0] = param
[0];
1158 c
->param
[1] = param
[1];
1160 sws_setColorspaceDetails(c
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], c
->srcRange
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
] /* FIXME*/, c
->dstRange
, 0, 1<<16, 1<<16);
1162 if(sws_init_context(c
, srcFilter
, dstFilter
) < 0){
1171 SwsFilter
*sws_getDefaultFilter(float lumaGBlur
, float chromaGBlur
,
1172 float lumaSharpen
, float chromaSharpen
,
1173 float chromaHShift
, float chromaVShift
,
1176 SwsFilter
*filter
= av_malloc(sizeof(SwsFilter
));
1180 if (lumaGBlur
!=0.0) {
1181 filter
->lumH
= sws_getGaussianVec(lumaGBlur
, 3.0);
1182 filter
->lumV
= sws_getGaussianVec(lumaGBlur
, 3.0);
1184 filter
->lumH
= sws_getIdentityVec();
1185 filter
->lumV
= sws_getIdentityVec();
1188 if (chromaGBlur
!=0.0) {
1189 filter
->chrH
= sws_getGaussianVec(chromaGBlur
, 3.0);
1190 filter
->chrV
= sws_getGaussianVec(chromaGBlur
, 3.0);
1192 filter
->chrH
= sws_getIdentityVec();
1193 filter
->chrV
= sws_getIdentityVec();
1196 if (chromaSharpen
!=0.0) {
1197 SwsVector
*id
= sws_getIdentityVec();
1198 sws_scaleVec(filter
->chrH
, -chromaSharpen
);
1199 sws_scaleVec(filter
->chrV
, -chromaSharpen
);
1200 sws_addVec(filter
->chrH
, id
);
1201 sws_addVec(filter
->chrV
, id
);
1205 if (lumaSharpen
!=0.0) {
1206 SwsVector
*id
= sws_getIdentityVec();
1207 sws_scaleVec(filter
->lumH
, -lumaSharpen
);
1208 sws_scaleVec(filter
->lumV
, -lumaSharpen
);
1209 sws_addVec(filter
->lumH
, id
);
1210 sws_addVec(filter
->lumV
, id
);
1214 if (chromaHShift
!= 0.0)
1215 sws_shiftVec(filter
->chrH
, (int)(chromaHShift
+0.5));
1217 if (chromaVShift
!= 0.0)
1218 sws_shiftVec(filter
->chrV
, (int)(chromaVShift
+0.5));
1220 sws_normalizeVec(filter
->chrH
, 1.0);
1221 sws_normalizeVec(filter
->chrV
, 1.0);
1222 sws_normalizeVec(filter
->lumH
, 1.0);
1223 sws_normalizeVec(filter
->lumV
, 1.0);
1225 if (verbose
) sws_printVec2(filter
->chrH
, NULL
, AV_LOG_DEBUG
);
1226 if (verbose
) sws_printVec2(filter
->lumH
, NULL
, AV_LOG_DEBUG
);
1231 SwsVector
*sws_allocVec(int length
)
1233 SwsVector
*vec
= av_malloc(sizeof(SwsVector
));
1236 vec
->length
= length
;
1237 vec
->coeff
= av_malloc(sizeof(double) * length
);
1243 SwsVector
*sws_getGaussianVec(double variance
, double quality
)
1245 const int length
= (int)(variance
*quality
+ 0.5) | 1;
1247 double middle
= (length
-1)*0.5;
1248 SwsVector
*vec
= sws_allocVec(length
);
1253 for (i
=0; i
<length
; i
++) {
1254 double dist
= i
-middle
;
1255 vec
->coeff
[i
]= exp(-dist
*dist
/(2*variance
*variance
)) / sqrt(2*variance
*M_PI
);
1258 sws_normalizeVec(vec
, 1.0);
1263 SwsVector
*sws_getConstVec(double c
, int length
)
1266 SwsVector
*vec
= sws_allocVec(length
);
1271 for (i
=0; i
<length
; i
++)
1277 SwsVector
*sws_getIdentityVec(void)
1279 return sws_getConstVec(1.0, 1);
1282 static double sws_dcVec(SwsVector
*a
)
1287 for (i
=0; i
<a
->length
; i
++)
1293 void sws_scaleVec(SwsVector
*a
, double scalar
)
1297 for (i
=0; i
<a
->length
; i
++)
1298 a
->coeff
[i
]*= scalar
;
1301 void sws_normalizeVec(SwsVector
*a
, double height
)
1303 sws_scaleVec(a
, height
/sws_dcVec(a
));
1306 static SwsVector
*sws_getConvVec(SwsVector
*a
, SwsVector
*b
)
1308 int length
= a
->length
+ b
->length
- 1;
1310 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1315 for (i
=0; i
<a
->length
; i
++) {
1316 for (j
=0; j
<b
->length
; j
++) {
1317 vec
->coeff
[i
+j
]+= a
->coeff
[i
]*b
->coeff
[j
];
1324 static SwsVector
*sws_sumVec(SwsVector
*a
, SwsVector
*b
)
1326 int length
= FFMAX(a
->length
, b
->length
);
1328 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1333 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1334 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]+= b
->coeff
[i
];
1339 static SwsVector
*sws_diffVec(SwsVector
*a
, SwsVector
*b
)
1341 int length
= FFMAX(a
->length
, b
->length
);
1343 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1348 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2]+= a
->coeff
[i
];
1349 for (i
=0; i
<b
->length
; i
++) vec
->coeff
[i
+ (length
-1)/2 - (b
->length
-1)/2]-= b
->coeff
[i
];
1354 /* shift left / or right if "shift" is negative */
1355 static SwsVector
*sws_getShiftedVec(SwsVector
*a
, int shift
)
1357 int length
= a
->length
+ FFABS(shift
)*2;
1359 SwsVector
*vec
= sws_getConstVec(0.0, length
);
1364 for (i
=0; i
<a
->length
; i
++) {
1365 vec
->coeff
[i
+ (length
-1)/2 - (a
->length
-1)/2 - shift
]= a
->coeff
[i
];
1371 void sws_shiftVec(SwsVector
*a
, int shift
)
1373 SwsVector
*shifted
= sws_getShiftedVec(a
, shift
);
1375 a
->coeff
= shifted
->coeff
;
1376 a
->length
= shifted
->length
;
1380 void sws_addVec(SwsVector
*a
, SwsVector
*b
)
1382 SwsVector
*sum
= sws_sumVec(a
, b
);
1384 a
->coeff
= sum
->coeff
;
1385 a
->length
= sum
->length
;
1389 void sws_subVec(SwsVector
*a
, SwsVector
*b
)
1391 SwsVector
*diff
= sws_diffVec(a
, b
);
1393 a
->coeff
= diff
->coeff
;
1394 a
->length
= diff
->length
;
1398 void sws_convVec(SwsVector
*a
, SwsVector
*b
)
1400 SwsVector
*conv
= sws_getConvVec(a
, b
);
1402 a
->coeff
= conv
->coeff
;
1403 a
->length
= conv
->length
;
1407 SwsVector
*sws_cloneVec(SwsVector
*a
)
1410 SwsVector
*vec
= sws_allocVec(a
->length
);
1415 for (i
=0; i
<a
->length
; i
++) vec
->coeff
[i
]= a
->coeff
[i
];
1420 void sws_printVec2(SwsVector
*a
, AVClass
*log_ctx
, int log_level
)
1427 for (i
=0; i
<a
->length
; i
++)
1428 if (a
->coeff
[i
]>max
) max
= a
->coeff
[i
];
1430 for (i
=0; i
<a
->length
; i
++)
1431 if (a
->coeff
[i
]<min
) min
= a
->coeff
[i
];
1435 for (i
=0; i
<a
->length
; i
++) {
1436 int x
= (int)((a
->coeff
[i
]-min
)*60.0/range
+0.5);
1437 av_log(log_ctx
, log_level
, "%1.3f ", a
->coeff
[i
]);
1438 for (;x
>0; x
--) av_log(log_ctx
, log_level
, " ");
1439 av_log(log_ctx
, log_level
, "|\n");
1443 #if LIBSWSCALE_VERSION_MAJOR < 1
1444 void sws_printVec(SwsVector
*a
)
1446 sws_printVec2(a
, NULL
, AV_LOG_DEBUG
);
1450 void sws_freeVec(SwsVector
*a
)
1453 av_freep(&a
->coeff
);
1458 void sws_freeFilter(SwsFilter
*filter
)
1460 if (!filter
) return;
1462 if (filter
->lumH
) sws_freeVec(filter
->lumH
);
1463 if (filter
->lumV
) sws_freeVec(filter
->lumV
);
1464 if (filter
->chrH
) sws_freeVec(filter
->chrH
);
1465 if (filter
->chrV
) sws_freeVec(filter
->chrV
);
1469 void sws_freeContext(SwsContext
*c
)
1475 for (i
=0; i
<c
->vLumBufSize
; i
++)
1476 av_freep(&c
->lumPixBuf
[i
]);
1477 av_freep(&c
->lumPixBuf
);
1481 for (i
=0; i
<c
->vChrBufSize
; i
++)
1482 av_freep(&c
->chrPixBuf
[i
]);
1483 av_freep(&c
->chrPixBuf
);
1486 if (CONFIG_SWSCALE_ALPHA
&& c
->alpPixBuf
) {
1487 for (i
=0; i
<c
->vLumBufSize
; i
++)
1488 av_freep(&c
->alpPixBuf
[i
]);
1489 av_freep(&c
->alpPixBuf
);
1492 av_freep(&c
->vLumFilter
);
1493 av_freep(&c
->vChrFilter
);
1494 av_freep(&c
->hLumFilter
);
1495 av_freep(&c
->hChrFilter
);
1497 av_freep(&c
->vYCoeffsBank
);
1498 av_freep(&c
->vCCoeffsBank
);
1501 av_freep(&c
->vLumFilterPos
);
1502 av_freep(&c
->vChrFilterPos
);
1503 av_freep(&c
->hLumFilterPos
);
1504 av_freep(&c
->hChrFilterPos
);
1507 #ifdef MAP_ANONYMOUS
1508 if (c
->lumMmx2FilterCode
) munmap(c
->lumMmx2FilterCode
, c
->lumMmx2FilterCodeSize
);
1509 if (c
->chrMmx2FilterCode
) munmap(c
->chrMmx2FilterCode
, c
->chrMmx2FilterCodeSize
);
1510 #elif HAVE_VIRTUALALLOC
1511 if (c
->lumMmx2FilterCode
) VirtualFree(c
->lumMmx2FilterCode
, 0, MEM_RELEASE
);
1512 if (c
->chrMmx2FilterCode
) VirtualFree(c
->chrMmx2FilterCode
, 0, MEM_RELEASE
);
1514 av_free(c
->lumMmx2FilterCode
);
1515 av_free(c
->chrMmx2FilterCode
);
1517 c
->lumMmx2FilterCode
=NULL
;
1518 c
->chrMmx2FilterCode
=NULL
;
1519 #endif /* ARCH_X86 */
1521 av_freep(&c
->yuvTable
);
1526 struct SwsContext
*sws_getCachedContext(struct SwsContext
*context
,
1527 int srcW
, int srcH
, enum PixelFormat srcFormat
,
1528 int dstW
, int dstH
, enum PixelFormat dstFormat
, int flags
,
1529 SwsFilter
*srcFilter
, SwsFilter
*dstFilter
, const double *param
)
1531 static const double default_param
[2] = {SWS_PARAM_DEFAULT
, SWS_PARAM_DEFAULT
};
1534 param
= default_param
;
1536 flags
= update_flags_cpu(flags
);
1539 (context
->srcW
!= srcW
||
1540 context
->srcH
!= srcH
||
1541 context
->srcFormat
!= srcFormat
||
1542 context
->dstW
!= dstW
||
1543 context
->dstH
!= dstH
||
1544 context
->dstFormat
!= dstFormat
||
1545 context
->flags
!= flags
||
1546 context
->param
[0] != param
[0] ||
1547 context
->param
[1] != param
[1])) {
1548 sws_freeContext(context
);
1553 if (!(context
= sws_alloc_context()))
1555 context
->srcW
= srcW
;
1556 context
->srcH
= srcH
;
1557 context
->srcRange
= handle_jpeg(&srcFormat
);
1558 context
->srcFormat
= srcFormat
;
1559 context
->dstW
= dstW
;
1560 context
->dstH
= dstH
;
1561 context
->dstRange
= handle_jpeg(&dstFormat
);
1562 context
->dstFormat
= dstFormat
;
1563 context
->flags
= flags
;
1564 context
->param
[0] = param
[0];
1565 context
->param
[1] = param
[1];
1566 sws_setColorspaceDetails(context
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
], context
->srcRange
, ff_yuv2rgb_coeffs
[SWS_CS_DEFAULT
] /* FIXME*/, context
->dstRange
, 0, 1<<16, 1<<16);
1567 if (sws_init_context(context
, srcFilter
, dstFilter
) < 0) {
1568 sws_freeContext(context
);