3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #include "libavutil/imgutils.h"
31 #include "libavutil/internal.h"
33 #include "copy_block.h"
36 #include "simple_idct.h"
39 #include "imgconvert.h"
41 #include "mpegvideo.h"
44 uint8_t ff_cropTbl
[256 + 2 * MAX_NEG_CROP
] = {0, };
45 uint32_t ff_squareTbl
[512] = {0, };
48 #include "dsputil_template.c"
52 #include "dsputil_template.c"
56 #include "dsputil_template.c"
58 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
59 #define pb_7f (~0UL/255 * 0x7f)
60 #define pb_80 (~0UL/255 * 0x80)
62 const uint8_t ff_zigzag_direct
[64] = {
63 0, 1, 8, 16, 9, 2, 3, 10,
64 17, 24, 32, 25, 18, 11, 4, 5,
65 12, 19, 26, 33, 40, 48, 41, 34,
66 27, 20, 13, 6, 7, 14, 21, 28,
67 35, 42, 49, 56, 57, 50, 43, 36,
68 29, 22, 15, 23, 30, 37, 44, 51,
69 58, 59, 52, 45, 38, 31, 39, 46,
70 53, 60, 61, 54, 47, 55, 62, 63
73 /* Specific zigzag scan for 248 idct. NOTE that unlike the
74 specification, we interleave the fields */
75 const uint8_t ff_zigzag248_direct
[64] = {
76 0, 8, 1, 9, 16, 24, 2, 10,
77 17, 25, 32, 40, 48, 56, 33, 41,
78 18, 26, 3, 11, 4, 12, 19, 27,
79 34, 42, 49, 57, 50, 58, 35, 43,
80 20, 28, 5, 13, 6, 14, 21, 29,
81 36, 44, 51, 59, 52, 60, 37, 45,
82 22, 30, 7, 15, 23, 31, 38, 46,
83 53, 61, 54, 62, 39, 47, 55, 63,
86 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
87 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16
)[64];
89 const uint8_t ff_alternate_horizontal_scan
[64] = {
90 0, 1, 2, 3, 8, 9, 16, 17,
91 10, 11, 4, 5, 6, 7, 15, 14,
92 13, 12, 19, 18, 24, 25, 32, 33,
93 26, 27, 20, 21, 22, 23, 28, 29,
94 30, 31, 34, 35, 40, 41, 48, 49,
95 42, 43, 36, 37, 38, 39, 44, 45,
96 46, 47, 50, 51, 56, 57, 58, 59,
97 52, 53, 54, 55, 60, 61, 62, 63,
100 const uint8_t ff_alternate_vertical_scan
[64] = {
101 0, 8, 16, 24, 1, 9, 2, 10,
102 17, 25, 32, 40, 48, 56, 57, 49,
103 41, 33, 26, 18, 3, 11, 4, 12,
104 19, 27, 34, 42, 50, 58, 35, 43,
105 51, 59, 20, 28, 5, 13, 6, 14,
106 21, 29, 36, 44, 52, 60, 37, 45,
107 53, 61, 22, 30, 7, 15, 23, 31,
108 38, 46, 54, 62, 39, 47, 55, 63,
111 /* Input permutation for the simple_idct_mmx */
112 static const uint8_t simple_mmx_permutation
[64]={
113 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
114 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
115 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
116 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
117 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
118 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
119 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
120 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
123 static const uint8_t idct_sse2_row_perm
[8] = {0, 4, 1, 5, 2, 6, 3, 7};
125 void ff_init_scantable(uint8_t *permutation
, ScanTable
*st
, const uint8_t *src_scantable
){
129 st
->scantable
= src_scantable
;
133 j
= src_scantable
[i
];
134 st
->permutated
[i
] = permutation
[j
];
140 j
= st
->permutated
[i
];
142 st
->raster_end
[i
]= end
;
146 void ff_init_scantable_permutation(uint8_t *idct_permutation
,
147 int idct_permutation_type
)
151 switch(idct_permutation_type
){
152 case FF_NO_IDCT_PERM
:
154 idct_permutation
[i
]= i
;
156 case FF_LIBMPEG2_IDCT_PERM
:
158 idct_permutation
[i
]= (i
& 0x38) | ((i
& 6) >> 1) | ((i
& 1) << 2);
160 case FF_SIMPLE_IDCT_PERM
:
162 idct_permutation
[i
]= simple_mmx_permutation
[i
];
164 case FF_TRANSPOSE_IDCT_PERM
:
166 idct_permutation
[i
]= ((i
&7)<<3) | (i
>>3);
168 case FF_PARTTRANS_IDCT_PERM
:
170 idct_permutation
[i
]= (i
&0x24) | ((i
&3)<<3) | ((i
>>3)&3);
172 case FF_SSE2_IDCT_PERM
:
174 idct_permutation
[i
]= (i
&0x38) | idct_sse2_row_perm
[i
&7];
177 av_log(NULL
, AV_LOG_ERROR
, "Internal error, IDCT permutation not set\n");
181 static int pix_sum_c(uint8_t * pix
, int line_size
)
186 for (i
= 0; i
< 16; i
++) {
187 for (j
= 0; j
< 16; j
+= 8) {
198 pix
+= line_size
- 16;
203 static int pix_norm1_c(uint8_t * pix
, int line_size
)
206 uint32_t *sq
= ff_squareTbl
+ 256;
209 for (i
= 0; i
< 16; i
++) {
210 for (j
= 0; j
< 16; j
+= 8) {
222 register uint64_t x
=*(uint64_t*)pix
;
224 s
+= sq
[(x
>>8)&0xff];
225 s
+= sq
[(x
>>16)&0xff];
226 s
+= sq
[(x
>>24)&0xff];
227 s
+= sq
[(x
>>32)&0xff];
228 s
+= sq
[(x
>>40)&0xff];
229 s
+= sq
[(x
>>48)&0xff];
230 s
+= sq
[(x
>>56)&0xff];
232 register uint32_t x
=*(uint32_t*)pix
;
234 s
+= sq
[(x
>>8)&0xff];
235 s
+= sq
[(x
>>16)&0xff];
236 s
+= sq
[(x
>>24)&0xff];
237 x
=*(uint32_t*)(pix
+4);
239 s
+= sq
[(x
>>8)&0xff];
240 s
+= sq
[(x
>>16)&0xff];
241 s
+= sq
[(x
>>24)&0xff];
246 pix
+= line_size
- 16;
251 static void bswap_buf(uint32_t *dst
, const uint32_t *src
, int w
){
254 for(i
=0; i
+8<=w
; i
+=8){
255 dst
[i
+0]= av_bswap32(src
[i
+0]);
256 dst
[i
+1]= av_bswap32(src
[i
+1]);
257 dst
[i
+2]= av_bswap32(src
[i
+2]);
258 dst
[i
+3]= av_bswap32(src
[i
+3]);
259 dst
[i
+4]= av_bswap32(src
[i
+4]);
260 dst
[i
+5]= av_bswap32(src
[i
+5]);
261 dst
[i
+6]= av_bswap32(src
[i
+6]);
262 dst
[i
+7]= av_bswap32(src
[i
+7]);
265 dst
[i
+0]= av_bswap32(src
[i
+0]);
269 static void bswap16_buf(uint16_t *dst
, const uint16_t *src
, int len
)
272 *dst
++ = av_bswap16(*src
++);
275 static int sse4_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
278 uint32_t *sq
= ff_squareTbl
+ 256;
281 for (i
= 0; i
< h
; i
++) {
282 s
+= sq
[pix1
[0] - pix2
[0]];
283 s
+= sq
[pix1
[1] - pix2
[1]];
284 s
+= sq
[pix1
[2] - pix2
[2]];
285 s
+= sq
[pix1
[3] - pix2
[3]];
292 static int sse8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
295 uint32_t *sq
= ff_squareTbl
+ 256;
298 for (i
= 0; i
< h
; i
++) {
299 s
+= sq
[pix1
[0] - pix2
[0]];
300 s
+= sq
[pix1
[1] - pix2
[1]];
301 s
+= sq
[pix1
[2] - pix2
[2]];
302 s
+= sq
[pix1
[3] - pix2
[3]];
303 s
+= sq
[pix1
[4] - pix2
[4]];
304 s
+= sq
[pix1
[5] - pix2
[5]];
305 s
+= sq
[pix1
[6] - pix2
[6]];
306 s
+= sq
[pix1
[7] - pix2
[7]];
313 static int sse16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
316 uint32_t *sq
= ff_squareTbl
+ 256;
319 for (i
= 0; i
< h
; i
++) {
320 s
+= sq
[pix1
[ 0] - pix2
[ 0]];
321 s
+= sq
[pix1
[ 1] - pix2
[ 1]];
322 s
+= sq
[pix1
[ 2] - pix2
[ 2]];
323 s
+= sq
[pix1
[ 3] - pix2
[ 3]];
324 s
+= sq
[pix1
[ 4] - pix2
[ 4]];
325 s
+= sq
[pix1
[ 5] - pix2
[ 5]];
326 s
+= sq
[pix1
[ 6] - pix2
[ 6]];
327 s
+= sq
[pix1
[ 7] - pix2
[ 7]];
328 s
+= sq
[pix1
[ 8] - pix2
[ 8]];
329 s
+= sq
[pix1
[ 9] - pix2
[ 9]];
330 s
+= sq
[pix1
[10] - pix2
[10]];
331 s
+= sq
[pix1
[11] - pix2
[11]];
332 s
+= sq
[pix1
[12] - pix2
[12]];
333 s
+= sq
[pix1
[13] - pix2
[13]];
334 s
+= sq
[pix1
[14] - pix2
[14]];
335 s
+= sq
[pix1
[15] - pix2
[15]];
343 static void diff_pixels_c(int16_t *restrict block
, const uint8_t *s1
,
344 const uint8_t *s2
, int stride
){
347 /* read the pixels */
349 block
[0] = s1
[0] - s2
[0];
350 block
[1] = s1
[1] - s2
[1];
351 block
[2] = s1
[2] - s2
[2];
352 block
[3] = s1
[3] - s2
[3];
353 block
[4] = s1
[4] - s2
[4];
354 block
[5] = s1
[5] - s2
[5];
355 block
[6] = s1
[6] - s2
[6];
356 block
[7] = s1
[7] - s2
[7];
364 static void put_pixels_clamped_c(const int16_t *block
, uint8_t *restrict pixels
,
369 /* read the pixels */
371 pixels
[0] = av_clip_uint8(block
[0]);
372 pixels
[1] = av_clip_uint8(block
[1]);
373 pixels
[2] = av_clip_uint8(block
[2]);
374 pixels
[3] = av_clip_uint8(block
[3]);
375 pixels
[4] = av_clip_uint8(block
[4]);
376 pixels
[5] = av_clip_uint8(block
[5]);
377 pixels
[6] = av_clip_uint8(block
[6]);
378 pixels
[7] = av_clip_uint8(block
[7]);
385 static void put_signed_pixels_clamped_c(const int16_t *block
,
386 uint8_t *restrict pixels
,
391 for (i
= 0; i
< 8; i
++) {
392 for (j
= 0; j
< 8; j
++) {
395 else if (*block
> 127)
398 *pixels
= (uint8_t)(*block
+ 128);
402 pixels
+= (line_size
- 8);
406 static void add_pixels8_c(uint8_t *restrict pixels
,
413 pixels
[0] += block
[0];
414 pixels
[1] += block
[1];
415 pixels
[2] += block
[2];
416 pixels
[3] += block
[3];
417 pixels
[4] += block
[4];
418 pixels
[5] += block
[5];
419 pixels
[6] += block
[6];
420 pixels
[7] += block
[7];
426 static void add_pixels_clamped_c(const int16_t *block
, uint8_t *restrict pixels
,
431 /* read the pixels */
433 pixels
[0] = av_clip_uint8(pixels
[0] + block
[0]);
434 pixels
[1] = av_clip_uint8(pixels
[1] + block
[1]);
435 pixels
[2] = av_clip_uint8(pixels
[2] + block
[2]);
436 pixels
[3] = av_clip_uint8(pixels
[3] + block
[3]);
437 pixels
[4] = av_clip_uint8(pixels
[4] + block
[4]);
438 pixels
[5] = av_clip_uint8(pixels
[5] + block
[5]);
439 pixels
[6] = av_clip_uint8(pixels
[6] + block
[6]);
440 pixels
[7] = av_clip_uint8(pixels
[7] + block
[7]);
446 static int sum_abs_dctelem_c(int16_t *block
)
450 sum
+= FFABS(block
[i
]);
454 static void fill_block16_c(uint8_t *block
, uint8_t value
, int line_size
, int h
)
458 for (i
= 0; i
< h
; i
++) {
459 memset(block
, value
, 16);
464 static void fill_block8_c(uint8_t *block
, uint8_t value
, int line_size
, int h
)
468 for (i
= 0; i
< h
; i
++) {
469 memset(block
, value
, 8);
474 #define avg2(a,b) ((a+b+1)>>1)
475 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
477 static void gmc1_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int x16
, int y16
, int rounder
)
479 const int A
=(16-x16
)*(16-y16
);
480 const int B
=( x16
)*(16-y16
);
481 const int C
=(16-x16
)*( y16
);
482 const int D
=( x16
)*( y16
);
487 dst
[0]= (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + rounder
)>>8;
488 dst
[1]= (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + rounder
)>>8;
489 dst
[2]= (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + rounder
)>>8;
490 dst
[3]= (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + rounder
)>>8;
491 dst
[4]= (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + rounder
)>>8;
492 dst
[5]= (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + rounder
)>>8;
493 dst
[6]= (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + rounder
)>>8;
494 dst
[7]= (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + rounder
)>>8;
500 void ff_gmc_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int ox
, int oy
,
501 int dxx
, int dxy
, int dyx
, int dyy
, int shift
, int r
, int width
, int height
)
504 const int s
= 1<<shift
;
514 for(x
=0; x
<8; x
++){ //XXX FIXME optimize
515 int src_x
, src_y
, frac_x
, frac_y
, index
;
524 if((unsigned)src_x
< width
){
525 if((unsigned)src_y
< height
){
526 index
= src_x
+ src_y
*stride
;
527 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
528 + src
[index
+1]* frac_x
)*(s
-frac_y
)
529 + ( src
[index
+stride
]*(s
-frac_x
)
530 + src
[index
+stride
+1]* frac_x
)* frac_y
533 index
= src_x
+ av_clip(src_y
, 0, height
)*stride
;
534 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
535 + src
[index
+1]* frac_x
)*s
539 if((unsigned)src_y
< height
){
540 index
= av_clip(src_x
, 0, width
) + src_y
*stride
;
541 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_y
)
542 + src
[index
+stride
]* frac_y
)*s
545 index
= av_clip(src_x
, 0, width
) + av_clip(src_y
, 0, height
)*stride
;
546 dst
[y
*stride
+ x
]= src
[index
];
558 static inline void put_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
560 case 2: put_pixels2_8_c (dst
, src
, stride
, height
); break;
561 case 4: put_pixels4_8_c (dst
, src
, stride
, height
); break;
562 case 8: put_pixels8_8_c (dst
, src
, stride
, height
); break;
563 case 16:put_pixels16_8_c(dst
, src
, stride
, height
); break;
567 static inline void put_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
569 for (i
=0; i
< height
; i
++) {
570 for (j
=0; j
< width
; j
++) {
571 dst
[j
] = (683*(2*src
[j
] + src
[j
+1] + 1)) >> 11;
578 static inline void put_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
580 for (i
=0; i
< height
; i
++) {
581 for (j
=0; j
< width
; j
++) {
582 dst
[j
] = (683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11;
589 static inline void put_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
591 for (i
=0; i
< height
; i
++) {
592 for (j
=0; j
< width
; j
++) {
593 dst
[j
] = (683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11;
600 static inline void put_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
602 for (i
=0; i
< height
; i
++) {
603 for (j
=0; j
< width
; j
++) {
604 dst
[j
] = (2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15;
611 static inline void put_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
613 for (i
=0; i
< height
; i
++) {
614 for (j
=0; j
< width
; j
++) {
615 dst
[j
] = (2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
622 static inline void put_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
624 for (i
=0; i
< height
; i
++) {
625 for (j
=0; j
< width
; j
++) {
626 dst
[j
] = (683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11;
633 static inline void put_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
635 for (i
=0; i
< height
; i
++) {
636 for (j
=0; j
< width
; j
++) {
637 dst
[j
] = (2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
644 static inline void put_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
646 for (i
=0; i
< height
; i
++) {
647 for (j
=0; j
< width
; j
++) {
648 dst
[j
] = (2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15;
655 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
657 case 2: avg_pixels2_8_c (dst
, src
, stride
, height
); break;
658 case 4: avg_pixels4_8_c (dst
, src
, stride
, height
); break;
659 case 8: avg_pixels8_8_c (dst
, src
, stride
, height
); break;
660 case 16:avg_pixels16_8_c(dst
, src
, stride
, height
); break;
664 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
666 for (i
=0; i
< height
; i
++) {
667 for (j
=0; j
< width
; j
++) {
668 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+1] + 1)) >> 11) + 1) >> 1;
675 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
677 for (i
=0; i
< height
; i
++) {
678 for (j
=0; j
< width
; j
++) {
679 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11) + 1) >> 1;
686 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
688 for (i
=0; i
< height
; i
++) {
689 for (j
=0; j
< width
; j
++) {
690 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
697 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
699 for (i
=0; i
< height
; i
++) {
700 for (j
=0; j
< width
; j
++) {
701 dst
[j
] = (dst
[j
] + ((2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
708 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
710 for (i
=0; i
< height
; i
++) {
711 for (j
=0; j
< width
; j
++) {
712 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
719 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
721 for (i
=0; i
< height
; i
++) {
722 for (j
=0; j
< width
; j
++) {
723 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
730 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
732 for (i
=0; i
< height
; i
++) {
733 for (j
=0; j
< width
; j
++) {
734 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
741 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
743 for (i
=0; i
< height
; i
++) {
744 for (j
=0; j
< width
; j
++) {
745 dst
[j
] = (dst
[j
] + ((2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
752 #define QPEL_MC(r, OPNAME, RND, OP) \
753 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
754 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
758 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
759 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
760 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
761 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
762 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
763 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
764 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
765 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
771 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
773 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
777 const int src0= src[0*srcStride];\
778 const int src1= src[1*srcStride];\
779 const int src2= src[2*srcStride];\
780 const int src3= src[3*srcStride];\
781 const int src4= src[4*srcStride];\
782 const int src5= src[5*srcStride];\
783 const int src6= src[6*srcStride];\
784 const int src7= src[7*srcStride];\
785 const int src8= src[8*srcStride];\
786 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
787 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
788 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
789 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
790 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
791 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
792 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
793 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
799 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
800 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
805 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
806 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
807 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
808 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
809 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
810 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
811 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
812 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
813 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
814 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
815 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
816 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
817 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
818 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
819 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
820 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
826 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
827 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
832 const int src0= src[0*srcStride];\
833 const int src1= src[1*srcStride];\
834 const int src2= src[2*srcStride];\
835 const int src3= src[3*srcStride];\
836 const int src4= src[4*srcStride];\
837 const int src5= src[5*srcStride];\
838 const int src6= src[6*srcStride];\
839 const int src7= src[7*srcStride];\
840 const int src8= src[8*srcStride];\
841 const int src9= src[9*srcStride];\
842 const int src10= src[10*srcStride];\
843 const int src11= src[11*srcStride];\
844 const int src12= src[12*srcStride];\
845 const int src13= src[13*srcStride];\
846 const int src14= src[14*srcStride];\
847 const int src15= src[15*srcStride];\
848 const int src16= src[16*srcStride];\
849 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
850 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
851 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
852 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
853 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
854 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
855 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
856 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
857 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
858 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
859 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
860 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
861 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
862 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
863 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
864 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
870 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
872 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
873 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
876 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
877 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
880 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
882 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
883 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
886 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
889 copy_block9(full, src, 16, stride, 9);\
890 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
891 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
894 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
896 copy_block9(full, src, 16, stride, 9);\
897 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
900 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
903 copy_block9(full, src, 16, stride, 9);\
904 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
905 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
907 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
912 copy_block9(full, src, 16, stride, 9);\
913 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
914 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
915 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
916 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
918 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
922 copy_block9(full, src, 16, stride, 9);\
923 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
924 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
925 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
926 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
928 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
933 copy_block9(full, src, 16, stride, 9);\
934 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
935 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
936 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
937 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
939 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
943 copy_block9(full, src, 16, stride, 9);\
944 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
945 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
946 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
947 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
949 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
954 copy_block9(full, src, 16, stride, 9);\
955 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
956 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
957 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
958 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
960 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
964 copy_block9(full, src, 16, stride, 9);\
965 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
966 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
967 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
968 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
970 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
975 copy_block9(full, src, 16, stride, 9);\
976 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
977 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
978 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
979 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
981 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
985 copy_block9(full, src, 16, stride, 9);\
986 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
987 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
988 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
989 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
991 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
994 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
995 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
996 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
998 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1000 uint8_t halfHV[64];\
1001 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1002 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1003 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1005 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1006 uint8_t full[16*9];\
1009 uint8_t halfHV[64];\
1010 copy_block9(full, src, 16, stride, 9);\
1011 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1012 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1013 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1014 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1016 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1017 uint8_t full[16*9];\
1019 copy_block9(full, src, 16, stride, 9);\
1020 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1021 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1022 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1024 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1025 uint8_t full[16*9];\
1028 uint8_t halfHV[64];\
1029 copy_block9(full, src, 16, stride, 9);\
1030 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1031 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1032 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1033 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1035 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1036 uint8_t full[16*9];\
1038 copy_block9(full, src, 16, stride, 9);\
1039 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1040 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1041 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1043 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1045 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1046 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1049 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1051 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1052 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1055 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1056 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1059 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1061 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1062 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1065 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1066 uint8_t full[24*17];\
1068 copy_block17(full, src, 24, stride, 17);\
1069 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1070 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1073 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1074 uint8_t full[24*17];\
1075 copy_block17(full, src, 24, stride, 17);\
1076 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1079 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1080 uint8_t full[24*17];\
1082 copy_block17(full, src, 24, stride, 17);\
1083 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1084 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1086 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1087 uint8_t full[24*17];\
1088 uint8_t halfH[272];\
1089 uint8_t halfV[256];\
1090 uint8_t halfHV[256];\
1091 copy_block17(full, src, 24, stride, 17);\
1092 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1093 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1094 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1095 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1097 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1098 uint8_t full[24*17];\
1099 uint8_t halfH[272];\
1100 uint8_t halfHV[256];\
1101 copy_block17(full, src, 24, stride, 17);\
1102 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1103 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1104 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1105 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1107 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1108 uint8_t full[24*17];\
1109 uint8_t halfH[272];\
1110 uint8_t halfV[256];\
1111 uint8_t halfHV[256];\
1112 copy_block17(full, src, 24, stride, 17);\
1113 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1114 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1115 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1116 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1118 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1119 uint8_t full[24*17];\
1120 uint8_t halfH[272];\
1121 uint8_t halfHV[256];\
1122 copy_block17(full, src, 24, stride, 17);\
1123 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1124 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1125 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1126 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1128 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1129 uint8_t full[24*17];\
1130 uint8_t halfH[272];\
1131 uint8_t halfV[256];\
1132 uint8_t halfHV[256];\
1133 copy_block17(full, src, 24, stride, 17);\
1134 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1135 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1136 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1137 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1139 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1140 uint8_t full[24*17];\
1141 uint8_t halfH[272];\
1142 uint8_t halfHV[256];\
1143 copy_block17(full, src, 24, stride, 17);\
1144 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1145 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1146 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1147 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1149 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1150 uint8_t full[24*17];\
1151 uint8_t halfH[272];\
1152 uint8_t halfV[256];\
1153 uint8_t halfHV[256];\
1154 copy_block17(full, src, 24, stride, 17);\
1155 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1156 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1157 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1158 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1160 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1161 uint8_t full[24*17];\
1162 uint8_t halfH[272];\
1163 uint8_t halfHV[256];\
1164 copy_block17(full, src, 24, stride, 17);\
1165 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1166 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1167 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1168 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1170 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1171 uint8_t halfH[272];\
1172 uint8_t halfHV[256];\
1173 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1174 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1175 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1177 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1178 uint8_t halfH[272];\
1179 uint8_t halfHV[256];\
1180 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1181 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1182 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1184 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1185 uint8_t full[24*17];\
1186 uint8_t halfH[272];\
1187 uint8_t halfV[256];\
1188 uint8_t halfHV[256];\
1189 copy_block17(full, src, 24, stride, 17);\
1190 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1191 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1192 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1193 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1195 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1196 uint8_t full[24*17];\
1197 uint8_t halfH[272];\
1198 copy_block17(full, src, 24, stride, 17);\
1199 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1200 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1201 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1203 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1204 uint8_t full[24*17];\
1205 uint8_t halfH[272];\
1206 uint8_t halfV[256];\
1207 uint8_t halfHV[256];\
1208 copy_block17(full, src, 24, stride, 17);\
1209 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1210 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1211 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1212 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1214 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1215 uint8_t full[24*17];\
1216 uint8_t halfH[272];\
1217 copy_block17(full, src, 24, stride, 17);\
1218 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1219 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1220 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1222 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1223 uint8_t halfH[272];\
1224 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1225 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1228 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1229 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1230 #define op_put(a, b) a = cm[((b) + 16)>>5]
1231 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1233 QPEL_MC(0, put_
, _
, op_put
)
1234 QPEL_MC(1, put_no_rnd_
, _no_rnd_
, op_put_no_rnd
)
1235 QPEL_MC(0, avg_
, _
, op_avg
)
1236 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1238 #undef op_avg_no_rnd
1240 #undef op_put_no_rnd
1242 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1243 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1244 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1245 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1246 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1247 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1249 static void wmv2_mspel8_h_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
){
1250 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
1254 dst
[0]= cm
[(9*(src
[0] + src
[1]) - (src
[-1] + src
[2]) + 8)>>4];
1255 dst
[1]= cm
[(9*(src
[1] + src
[2]) - (src
[ 0] + src
[3]) + 8)>>4];
1256 dst
[2]= cm
[(9*(src
[2] + src
[3]) - (src
[ 1] + src
[4]) + 8)>>4];
1257 dst
[3]= cm
[(9*(src
[3] + src
[4]) - (src
[ 2] + src
[5]) + 8)>>4];
1258 dst
[4]= cm
[(9*(src
[4] + src
[5]) - (src
[ 3] + src
[6]) + 8)>>4];
1259 dst
[5]= cm
[(9*(src
[5] + src
[6]) - (src
[ 4] + src
[7]) + 8)>>4];
1260 dst
[6]= cm
[(9*(src
[6] + src
[7]) - (src
[ 5] + src
[8]) + 8)>>4];
1261 dst
[7]= cm
[(9*(src
[7] + src
[8]) - (src
[ 6] + src
[9]) + 8)>>4];
1267 #if CONFIG_RV40_DECODER
1268 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst
, uint8_t *src
, int stride
){
1269 put_pixels16_xy2_8_c(dst
, src
, stride
, 16);
1271 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst
, uint8_t *src
, int stride
){
1272 avg_pixels16_xy2_8_c(dst
, src
, stride
, 16);
1274 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst
, uint8_t *src
, int stride
){
1275 put_pixels8_xy2_8_c(dst
, src
, stride
, 8);
1277 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst
, uint8_t *src
, int stride
){
1278 avg_pixels8_xy2_8_c(dst
, src
, stride
, 8);
1280 #endif /* CONFIG_RV40_DECODER */
1282 static void wmv2_mspel8_v_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int w
){
1283 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
1287 const int src_1
= src
[ -srcStride
];
1288 const int src0
= src
[0 ];
1289 const int src1
= src
[ srcStride
];
1290 const int src2
= src
[2*srcStride
];
1291 const int src3
= src
[3*srcStride
];
1292 const int src4
= src
[4*srcStride
];
1293 const int src5
= src
[5*srcStride
];
1294 const int src6
= src
[6*srcStride
];
1295 const int src7
= src
[7*srcStride
];
1296 const int src8
= src
[8*srcStride
];
1297 const int src9
= src
[9*srcStride
];
1298 dst
[0*dstStride
]= cm
[(9*(src0
+ src1
) - (src_1
+ src2
) + 8)>>4];
1299 dst
[1*dstStride
]= cm
[(9*(src1
+ src2
) - (src0
+ src3
) + 8)>>4];
1300 dst
[2*dstStride
]= cm
[(9*(src2
+ src3
) - (src1
+ src4
) + 8)>>4];
1301 dst
[3*dstStride
]= cm
[(9*(src3
+ src4
) - (src2
+ src5
) + 8)>>4];
1302 dst
[4*dstStride
]= cm
[(9*(src4
+ src5
) - (src3
+ src6
) + 8)>>4];
1303 dst
[5*dstStride
]= cm
[(9*(src5
+ src6
) - (src4
+ src7
) + 8)>>4];
1304 dst
[6*dstStride
]= cm
[(9*(src6
+ src7
) - (src5
+ src8
) + 8)>>4];
1305 dst
[7*dstStride
]= cm
[(9*(src7
+ src8
) - (src6
+ src9
) + 8)>>4];
1311 static void put_mspel8_mc10_c(uint8_t *dst
, uint8_t *src
, int stride
){
1313 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
1314 put_pixels8_l2_8(dst
, src
, half
, stride
, stride
, 8, 8);
1317 static void put_mspel8_mc20_c(uint8_t *dst
, uint8_t *src
, int stride
){
1318 wmv2_mspel8_h_lowpass(dst
, src
, stride
, stride
, 8);
1321 static void put_mspel8_mc30_c(uint8_t *dst
, uint8_t *src
, int stride
){
1323 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
1324 put_pixels8_l2_8(dst
, src
+1, half
, stride
, stride
, 8, 8);
1327 static void put_mspel8_mc02_c(uint8_t *dst
, uint8_t *src
, int stride
){
1328 wmv2_mspel8_v_lowpass(dst
, src
, stride
, stride
, 8);
1331 static void put_mspel8_mc12_c(uint8_t *dst
, uint8_t *src
, int stride
){
1335 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
1336 wmv2_mspel8_v_lowpass(halfV
, src
, 8, stride
, 8);
1337 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
1338 put_pixels8_l2_8(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
1340 static void put_mspel8_mc32_c(uint8_t *dst
, uint8_t *src
, int stride
){
1344 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
1345 wmv2_mspel8_v_lowpass(halfV
, src
+1, 8, stride
, 8);
1346 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
1347 put_pixels8_l2_8(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
1349 static void put_mspel8_mc22_c(uint8_t *dst
, uint8_t *src
, int stride
){
1351 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
1352 wmv2_mspel8_v_lowpass(dst
, halfH
+8, stride
, 8, 8);
1355 static void h263_v_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
1356 if(CONFIG_H263_DECODER
|| CONFIG_H263_ENCODER
) {
1358 const int strength
= ff_h263_loop_filter_strength
[qscale
];
1362 int p0
= src
[x
-2*stride
];
1363 int p1
= src
[x
-1*stride
];
1364 int p2
= src
[x
+0*stride
];
1365 int p3
= src
[x
+1*stride
];
1366 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
1368 if (d
<-2*strength
) d1
= 0;
1369 else if(d
<- strength
) d1
=-2*strength
- d
;
1370 else if(d
< strength
) d1
= d
;
1371 else if(d
< 2*strength
) d1
= 2*strength
- d
;
1376 if(p1
&256) p1
= ~(p1
>>31);
1377 if(p2
&256) p2
= ~(p2
>>31);
1379 src
[x
-1*stride
] = p1
;
1380 src
[x
+0*stride
] = p2
;
1384 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
1386 src
[x
-2*stride
] = p0
- d2
;
1387 src
[x
+ stride
] = p3
+ d2
;
1392 static void h263_h_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
1393 if(CONFIG_H263_DECODER
|| CONFIG_H263_ENCODER
) {
1395 const int strength
= ff_h263_loop_filter_strength
[qscale
];
1399 int p0
= src
[y
*stride
-2];
1400 int p1
= src
[y
*stride
-1];
1401 int p2
= src
[y
*stride
+0];
1402 int p3
= src
[y
*stride
+1];
1403 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
1405 if (d
<-2*strength
) d1
= 0;
1406 else if(d
<- strength
) d1
=-2*strength
- d
;
1407 else if(d
< strength
) d1
= d
;
1408 else if(d
< 2*strength
) d1
= 2*strength
- d
;
1413 if(p1
&256) p1
= ~(p1
>>31);
1414 if(p2
&256) p2
= ~(p2
>>31);
1416 src
[y
*stride
-1] = p1
;
1417 src
[y
*stride
+0] = p2
;
1421 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
1423 src
[y
*stride
-2] = p0
- d2
;
1424 src
[y
*stride
+1] = p3
+ d2
;
1429 static void h261_loop_filter_c(uint8_t *src
, int stride
){
1434 temp
[x
] = 4*src
[x
];
1435 temp
[x
+ 7*8] = 4*src
[x
+ 7*stride
];
1439 xy
= y
* stride
+ x
;
1441 temp
[yz
] = src
[xy
- stride
] + 2*src
[xy
] + src
[xy
+ stride
];
1446 src
[ y
*stride
] = (temp
[ y
*8] + 2)>>2;
1447 src
[7+y
*stride
] = (temp
[7+y
*8] + 2)>>2;
1449 xy
= y
* stride
+ x
;
1451 src
[xy
] = (temp
[yz
-1] + 2*temp
[yz
] + temp
[yz
+1] + 8)>>4;
1456 static inline int pix_abs16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
1462 s
+= abs(pix1
[0] - pix2
[0]);
1463 s
+= abs(pix1
[1] - pix2
[1]);
1464 s
+= abs(pix1
[2] - pix2
[2]);
1465 s
+= abs(pix1
[3] - pix2
[3]);
1466 s
+= abs(pix1
[4] - pix2
[4]);
1467 s
+= abs(pix1
[5] - pix2
[5]);
1468 s
+= abs(pix1
[6] - pix2
[6]);
1469 s
+= abs(pix1
[7] - pix2
[7]);
1470 s
+= abs(pix1
[8] - pix2
[8]);
1471 s
+= abs(pix1
[9] - pix2
[9]);
1472 s
+= abs(pix1
[10] - pix2
[10]);
1473 s
+= abs(pix1
[11] - pix2
[11]);
1474 s
+= abs(pix1
[12] - pix2
[12]);
1475 s
+= abs(pix1
[13] - pix2
[13]);
1476 s
+= abs(pix1
[14] - pix2
[14]);
1477 s
+= abs(pix1
[15] - pix2
[15]);
1484 static int pix_abs16_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
1490 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
1491 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
1492 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
1493 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
1494 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
1495 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
1496 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
1497 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
1498 s
+= abs(pix1
[8] - avg2(pix2
[8], pix2
[9]));
1499 s
+= abs(pix1
[9] - avg2(pix2
[9], pix2
[10]));
1500 s
+= abs(pix1
[10] - avg2(pix2
[10], pix2
[11]));
1501 s
+= abs(pix1
[11] - avg2(pix2
[11], pix2
[12]));
1502 s
+= abs(pix1
[12] - avg2(pix2
[12], pix2
[13]));
1503 s
+= abs(pix1
[13] - avg2(pix2
[13], pix2
[14]));
1504 s
+= abs(pix1
[14] - avg2(pix2
[14], pix2
[15]));
1505 s
+= abs(pix1
[15] - avg2(pix2
[15], pix2
[16]));
1512 static int pix_abs16_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
1515 uint8_t *pix3
= pix2
+ line_size
;
1519 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
1520 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
1521 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
1522 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
1523 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
1524 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
1525 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
1526 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
1527 s
+= abs(pix1
[8] - avg2(pix2
[8], pix3
[8]));
1528 s
+= abs(pix1
[9] - avg2(pix2
[9], pix3
[9]));
1529 s
+= abs(pix1
[10] - avg2(pix2
[10], pix3
[10]));
1530 s
+= abs(pix1
[11] - avg2(pix2
[11], pix3
[11]));
1531 s
+= abs(pix1
[12] - avg2(pix2
[12], pix3
[12]));
1532 s
+= abs(pix1
[13] - avg2(pix2
[13], pix3
[13]));
1533 s
+= abs(pix1
[14] - avg2(pix2
[14], pix3
[14]));
1534 s
+= abs(pix1
[15] - avg2(pix2
[15], pix3
[15]));
1542 static int pix_abs16_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
1545 uint8_t *pix3
= pix2
+ line_size
;
1549 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
1550 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
1551 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
1552 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
1553 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
1554 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
1555 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
1556 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
1557 s
+= abs(pix1
[8] - avg4(pix2
[8], pix2
[9], pix3
[8], pix3
[9]));
1558 s
+= abs(pix1
[9] - avg4(pix2
[9], pix2
[10], pix3
[9], pix3
[10]));
1559 s
+= abs(pix1
[10] - avg4(pix2
[10], pix2
[11], pix3
[10], pix3
[11]));
1560 s
+= abs(pix1
[11] - avg4(pix2
[11], pix2
[12], pix3
[11], pix3
[12]));
1561 s
+= abs(pix1
[12] - avg4(pix2
[12], pix2
[13], pix3
[12], pix3
[13]));
1562 s
+= abs(pix1
[13] - avg4(pix2
[13], pix2
[14], pix3
[13], pix3
[14]));
1563 s
+= abs(pix1
[14] - avg4(pix2
[14], pix2
[15], pix3
[14], pix3
[15]));
1564 s
+= abs(pix1
[15] - avg4(pix2
[15], pix2
[16], pix3
[15], pix3
[16]));
1572 static inline int pix_abs8_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
1578 s
+= abs(pix1
[0] - pix2
[0]);
1579 s
+= abs(pix1
[1] - pix2
[1]);
1580 s
+= abs(pix1
[2] - pix2
[2]);
1581 s
+= abs(pix1
[3] - pix2
[3]);
1582 s
+= abs(pix1
[4] - pix2
[4]);
1583 s
+= abs(pix1
[5] - pix2
[5]);
1584 s
+= abs(pix1
[6] - pix2
[6]);
1585 s
+= abs(pix1
[7] - pix2
[7]);
1592 static int pix_abs8_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
1598 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
1599 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
1600 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
1601 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
1602 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
1603 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
1604 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
1605 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
1612 static int pix_abs8_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
1615 uint8_t *pix3
= pix2
+ line_size
;
1619 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
1620 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
1621 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
1622 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
1623 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
1624 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
1625 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
1626 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
1634 static int pix_abs8_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
1637 uint8_t *pix3
= pix2
+ line_size
;
1641 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
1642 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
1643 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
1644 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
1645 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
1646 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
1647 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
1648 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
1656 static int nsse16_c(void *v
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
1657 MpegEncContext
*c
= v
;
1663 for(x
=0; x
<16; x
++){
1664 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
1667 for(x
=0; x
<15; x
++){
1668 score2
+= FFABS( s1
[x
] - s1
[x
+stride
]
1669 - s1
[x
+1] + s1
[x
+1+stride
])
1670 -FFABS( s2
[x
] - s2
[x
+stride
]
1671 - s2
[x
+1] + s2
[x
+1+stride
]);
1678 if(c
) return score1
+ FFABS(score2
)*c
->avctx
->nsse_weight
;
1679 else return score1
+ FFABS(score2
)*8;
1682 static int nsse8_c(void *v
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
1683 MpegEncContext
*c
= v
;
1690 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
1694 score2
+= FFABS( s1
[x
] - s1
[x
+stride
]
1695 - s1
[x
+1] + s1
[x
+1+stride
])
1696 -FFABS( s2
[x
] - s2
[x
+stride
]
1697 - s2
[x
+1] + s2
[x
+1+stride
]);
1704 if(c
) return score1
+ FFABS(score2
)*c
->avctx
->nsse_weight
;
1705 else return score1
+ FFABS(score2
)*8;
1708 static int try_8x8basis_c(int16_t rem
[64], int16_t weight
[64], int16_t basis
[64], int scale
){
1712 for(i
=0; i
<8*8; i
++){
1713 int b
= rem
[i
] + ((basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
));
1716 assert(-512<b
&& b
<512);
1718 sum
+= (w
*b
)*(w
*b
)>>4;
1723 static void add_8x8basis_c(int16_t rem
[64], int16_t basis
[64], int scale
){
1726 for(i
=0; i
<8*8; i
++){
1727 rem
[i
] += (basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
);
1731 static int zero_cmp(void *s
, uint8_t *a
, uint8_t *b
, int stride
, int h
){
1735 void ff_set_cmp(DSPContext
* c
, me_cmp_func
*cmp
, int type
){
1738 memset(cmp
, 0, sizeof(void*)*6);
1746 cmp
[i
]= c
->hadamard8_diff
[i
];
1752 cmp
[i
]= c
->dct_sad
[i
];
1755 cmp
[i
]= c
->dct264_sad
[i
];
1758 cmp
[i
]= c
->dct_max
[i
];
1761 cmp
[i
]= c
->quant_psnr
[i
];
1782 av_log(NULL
, AV_LOG_ERROR
,"internal error in cmp function selection\n");
1787 static void add_bytes_c(uint8_t *dst
, uint8_t *src
, int w
){
1789 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
1790 long a
= *(long*)(src
+i
);
1791 long b
= *(long*)(dst
+i
);
1792 *(long*)(dst
+i
) = ((a
&pb_7f
) + (b
&pb_7f
)) ^ ((a
^b
)&pb_80
);
1795 dst
[i
+0] += src
[i
+0];
1798 static void diff_bytes_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
){
1800 #if !HAVE_FAST_UNALIGNED
1801 if((long)src2
& (sizeof(long)-1)){
1802 for(i
=0; i
+7<w
; i
+=8){
1803 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
1804 dst
[i
+1] = src1
[i
+1]-src2
[i
+1];
1805 dst
[i
+2] = src1
[i
+2]-src2
[i
+2];
1806 dst
[i
+3] = src1
[i
+3]-src2
[i
+3];
1807 dst
[i
+4] = src1
[i
+4]-src2
[i
+4];
1808 dst
[i
+5] = src1
[i
+5]-src2
[i
+5];
1809 dst
[i
+6] = src1
[i
+6]-src2
[i
+6];
1810 dst
[i
+7] = src1
[i
+7]-src2
[i
+7];
1814 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
1815 long a
= *(long*)(src1
+i
);
1816 long b
= *(long*)(src2
+i
);
1817 *(long*)(dst
+i
) = ((a
|pb_80
) - (b
&pb_7f
)) ^ ((a
^b
^pb_80
)&pb_80
);
1820 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
1823 static void add_hfyu_median_prediction_c(uint8_t *dst
, const uint8_t *src1
, const uint8_t *diff
, int w
, int *left
, int *left_top
){
1831 l
= mid_pred(l
, src1
[i
], (l
+ src1
[i
] - lt
)&0xFF) + diff
[i
];
1840 static void sub_hfyu_median_prediction_c(uint8_t *dst
, const uint8_t *src1
, const uint8_t *src2
, int w
, int *left
, int *left_top
){
1848 const int pred
= mid_pred(l
, src1
[i
], (l
+ src1
[i
] - lt
)&0xFF);
1858 static int add_hfyu_left_prediction_c(uint8_t *dst
, const uint8_t *src
, int w
, int acc
){
1861 for(i
=0; i
<w
-1; i
++){
1888 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst
, const uint8_t *src
, int w
, int *red
, int *green
, int *blue
, int *alpha
){
1918 #define BUTTERFLY2(o1,o2,i1,i2) \
1922 #define BUTTERFLY1(x,y) \
1931 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1933 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s
, uint8_t *dst
, uint8_t *src
, int stride
, int h
){
1941 //FIXME try pointer walks
1942 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0]-dst
[stride
*i
+0],src
[stride
*i
+1]-dst
[stride
*i
+1]);
1943 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2]-dst
[stride
*i
+2],src
[stride
*i
+3]-dst
[stride
*i
+3]);
1944 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4]-dst
[stride
*i
+4],src
[stride
*i
+5]-dst
[stride
*i
+5]);
1945 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6]-dst
[stride
*i
+6],src
[stride
*i
+7]-dst
[stride
*i
+7]);
1947 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
1948 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
1949 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
1950 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
1952 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
1953 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
1954 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
1955 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
1959 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
1960 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
1961 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
1962 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
1964 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
1965 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
1966 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
1967 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
1970 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
1971 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
1972 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
1973 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
1978 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s
, uint8_t *src
, uint8_t *dummy
, int stride
, int h
){
1986 //FIXME try pointer walks
1987 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0],src
[stride
*i
+1]);
1988 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2],src
[stride
*i
+3]);
1989 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4],src
[stride
*i
+5]);
1990 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6],src
[stride
*i
+7]);
1992 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
1993 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
1994 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
1995 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
1997 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
1998 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
1999 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
2000 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
2004 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
2005 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
2006 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
2007 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
2009 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
2010 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
2011 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
2012 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
2015 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
2016 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
2017 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
2018 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
2021 sum
-= FFABS(temp
[8*0] + temp
[8*4]); // -mean
2026 static int dct_sad8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
2027 MpegEncContext
* const s
= (MpegEncContext
*)c
;
2028 LOCAL_ALIGNED_16(int16_t, temp
, [64]);
2032 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
2034 return s
->dsp
.sum_abs_dctelem(temp
);
2039 const int s07 = SRC(0) + SRC(7);\
2040 const int s16 = SRC(1) + SRC(6);\
2041 const int s25 = SRC(2) + SRC(5);\
2042 const int s34 = SRC(3) + SRC(4);\
2043 const int a0 = s07 + s34;\
2044 const int a1 = s16 + s25;\
2045 const int a2 = s07 - s34;\
2046 const int a3 = s16 - s25;\
2047 const int d07 = SRC(0) - SRC(7);\
2048 const int d16 = SRC(1) - SRC(6);\
2049 const int d25 = SRC(2) - SRC(5);\
2050 const int d34 = SRC(3) - SRC(4);\
2051 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2052 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2053 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2054 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2056 DST(1, a4 + (a7>>2)) ;\
2057 DST(2, a2 + (a3>>1)) ;\
2058 DST(3, a5 + (a6>>2)) ;\
2060 DST(5, a6 - (a5>>2)) ;\
2061 DST(6, (a2>>1) - a3 ) ;\
2062 DST(7, (a4>>2) - a7 ) ;\
2065 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
2066 MpegEncContext
* const s
= (MpegEncContext
*)c
;
2071 s
->dsp
.diff_pixels(dct
[0], src1
, src2
, stride
);
2073 #define SRC(x) dct[i][x]
2074 #define DST(x,v) dct[i][x]= v
2075 for( i
= 0; i
< 8; i
++ )
2080 #define SRC(x) dct[x][i]
2081 #define DST(x,v) sum += FFABS(v)
2082 for( i
= 0; i
< 8; i
++ )
2090 static int dct_max8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
2091 MpegEncContext
* const s
= (MpegEncContext
*)c
;
2092 LOCAL_ALIGNED_16(int16_t, temp
, [64]);
2097 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
2101 sum
= FFMAX(sum
, FFABS(temp
[i
]));
2106 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
2107 MpegEncContext
* const s
= (MpegEncContext
*)c
;
2108 LOCAL_ALIGNED_16(int16_t, temp
, [64*2]);
2109 int16_t * const bak
= temp
+64;
2115 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
2117 memcpy(bak
, temp
, 64*sizeof(int16_t));
2119 s
->block_last_index
[0/*FIXME*/]= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
2120 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
2121 ff_simple_idct_8(temp
); //FIXME
2124 sum
+= (temp
[i
]-bak
[i
])*(temp
[i
]-bak
[i
]);
2129 static int rd8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
2130 MpegEncContext
* const s
= (MpegEncContext
*)c
;
2131 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
2132 LOCAL_ALIGNED_16(int16_t, temp
, [64]);
2133 LOCAL_ALIGNED_16(uint8_t, lsrc1
, [64]);
2134 LOCAL_ALIGNED_16(uint8_t, lsrc2
, [64]);
2135 int i
, last
, run
, bits
, level
, distortion
, start_i
;
2136 const int esc_length
= s
->ac_esc_length
;
2138 uint8_t * last_length
;
2142 copy_block8(lsrc1
, src1
, 8, stride
, 8);
2143 copy_block8(lsrc2
, src2
, 8, stride
, 8);
2145 s
->dsp
.diff_pixels(temp
, lsrc1
, lsrc2
, 8);
2147 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
2153 length
= s
->intra_ac_vlc_length
;
2154 last_length
= s
->intra_ac_vlc_last_length
;
2155 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
2158 length
= s
->inter_ac_vlc_length
;
2159 last_length
= s
->inter_ac_vlc_last_length
;
2164 for(i
=start_i
; i
<last
; i
++){
2165 int j
= scantable
[i
];
2170 if((level
&(~127)) == 0){
2171 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
2180 level
= temp
[i
] + 64;
2184 if((level
&(~127)) == 0){
2185 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
2193 s
->dct_unquantize_intra(s
, temp
, 0, s
->qscale
);
2195 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
2198 s
->dsp
.idct_add(lsrc2
, 8, temp
);
2200 distortion
= s
->dsp
.sse
[1](NULL
, lsrc2
, lsrc1
, 8, 8);
2202 return distortion
+ ((bits
*s
->qscale
*s
->qscale
*109 + 64)>>7);
2205 static int bit8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
2206 MpegEncContext
* const s
= (MpegEncContext
*)c
;
2207 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
2208 LOCAL_ALIGNED_16(int16_t, temp
, [64]);
2209 int i
, last
, run
, bits
, level
, start_i
;
2210 const int esc_length
= s
->ac_esc_length
;
2212 uint8_t * last_length
;
2216 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
2218 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
2224 length
= s
->intra_ac_vlc_length
;
2225 last_length
= s
->intra_ac_vlc_last_length
;
2226 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
2229 length
= s
->inter_ac_vlc_length
;
2230 last_length
= s
->inter_ac_vlc_last_length
;
2235 for(i
=start_i
; i
<last
; i
++){
2236 int j
= scantable
[i
];
2241 if((level
&(~127)) == 0){
2242 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
2251 level
= temp
[i
] + 64;
2255 if((level
&(~127)) == 0){
2256 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
2264 #define VSAD_INTRA(size) \
2265 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2269 for(y=1; y<h; y++){ \
2270 for(x=0; x<size; x+=4){ \
2271 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2272 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2282 static int vsad16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
2287 for(x
=0; x
<16; x
++){
2288 score
+= FFABS(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
2297 #define SQ(a) ((a)*(a))
2298 #define VSSE_INTRA(size) \
2299 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2303 for(y=1; y<h; y++){ \
2304 for(x=0; x<size; x+=4){ \
2305 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2306 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2316 static int vsse16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
2321 for(x
=0; x
<16; x
++){
2322 score
+= SQ(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
2331 static int ssd_int8_vs_int16_c(const int8_t *pix1
, const int16_t *pix2
,
2335 for(i
=0; i
<size
; i
++)
2336 score
+= (pix1
[i
]-pix2
[i
])*(pix1
[i
]-pix2
[i
]);
2340 #define WRAPPER8_16_SQ(name8, name16)\
2341 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2343 score +=name8(s, dst , src , stride, 8);\
2344 score +=name8(s, dst+8 , src+8 , stride, 8);\
2348 score +=name8(s, dst , src , stride, 8);\
2349 score +=name8(s, dst+8 , src+8 , stride, 8);\
2354 WRAPPER8_16_SQ(hadamard8_diff8x8_c
, hadamard8_diff16_c
)
2355 WRAPPER8_16_SQ(hadamard8_intra8x8_c
, hadamard8_intra16_c
)
2356 WRAPPER8_16_SQ(dct_sad8x8_c
, dct_sad16_c
)
2358 WRAPPER8_16_SQ(dct264_sad8x8_c
, dct264_sad16_c
)
2360 WRAPPER8_16_SQ(dct_max8x8_c
, dct_max16_c
)
2361 WRAPPER8_16_SQ(quant_psnr8x8_c
, quant_psnr16_c
)
2362 WRAPPER8_16_SQ(rd8x8_c
, rd16_c
)
2363 WRAPPER8_16_SQ(bit8x8_c
, bit16_c
)
2365 static inline uint32_t clipf_c_one(uint32_t a
, uint32_t mini
,
2366 uint32_t maxi
, uint32_t maxisign
)
2369 if(a
> mini
) return mini
;
2370 else if((a
^(1U<<31)) > maxisign
) return maxi
;
2374 static void vector_clipf_c_opposite_sign(float *dst
, const float *src
, float *min
, float *max
, int len
){
2376 uint32_t mini
= *(uint32_t*)min
;
2377 uint32_t maxi
= *(uint32_t*)max
;
2378 uint32_t maxisign
= maxi
^ (1U<<31);
2379 uint32_t *dsti
= (uint32_t*)dst
;
2380 const uint32_t *srci
= (const uint32_t*)src
;
2381 for(i
=0; i
<len
; i
+=8) {
2382 dsti
[i
+ 0] = clipf_c_one(srci
[i
+ 0], mini
, maxi
, maxisign
);
2383 dsti
[i
+ 1] = clipf_c_one(srci
[i
+ 1], mini
, maxi
, maxisign
);
2384 dsti
[i
+ 2] = clipf_c_one(srci
[i
+ 2], mini
, maxi
, maxisign
);
2385 dsti
[i
+ 3] = clipf_c_one(srci
[i
+ 3], mini
, maxi
, maxisign
);
2386 dsti
[i
+ 4] = clipf_c_one(srci
[i
+ 4], mini
, maxi
, maxisign
);
2387 dsti
[i
+ 5] = clipf_c_one(srci
[i
+ 5], mini
, maxi
, maxisign
);
2388 dsti
[i
+ 6] = clipf_c_one(srci
[i
+ 6], mini
, maxi
, maxisign
);
2389 dsti
[i
+ 7] = clipf_c_one(srci
[i
+ 7], mini
, maxi
, maxisign
);
2392 static void vector_clipf_c(float *dst
, const float *src
, float min
, float max
, int len
){
2394 if(min
< 0 && max
> 0) {
2395 vector_clipf_c_opposite_sign(dst
, src
, &min
, &max
, len
);
2397 for(i
=0; i
< len
; i
+=8) {
2398 dst
[i
] = av_clipf(src
[i
], min
, max
);
2399 dst
[i
+ 1] = av_clipf(src
[i
+ 1], min
, max
);
2400 dst
[i
+ 2] = av_clipf(src
[i
+ 2], min
, max
);
2401 dst
[i
+ 3] = av_clipf(src
[i
+ 3], min
, max
);
2402 dst
[i
+ 4] = av_clipf(src
[i
+ 4], min
, max
);
2403 dst
[i
+ 5] = av_clipf(src
[i
+ 5], min
, max
);
2404 dst
[i
+ 6] = av_clipf(src
[i
+ 6], min
, max
);
2405 dst
[i
+ 7] = av_clipf(src
[i
+ 7], min
, max
);
2410 static int32_t scalarproduct_int16_c(const int16_t * v1
, const int16_t * v2
, int order
)
2415 res
+= *v1
++ * *v2
++;
2420 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1
, const int16_t *v2
, const int16_t *v3
, int order
, int mul
)
2425 *v1
++ += mul
* *v3
++;
2430 static void apply_window_int16_c(int16_t *output
, const int16_t *input
,
2431 const int16_t *window
, unsigned int len
)
2434 int len2
= len
>> 1;
2436 for (i
= 0; i
< len2
; i
++) {
2437 int16_t w
= window
[i
];
2438 output
[i
] = (MUL16(input
[i
], w
) + (1 << 14)) >> 15;
2439 output
[len
-i
-1] = (MUL16(input
[len
-i
-1], w
) + (1 << 14)) >> 15;
2443 static void vector_clip_int32_c(int32_t *dst
, const int32_t *src
, int32_t min
,
2444 int32_t max
, unsigned int len
)
2447 *dst
++ = av_clip(*src
++, min
, max
);
2448 *dst
++ = av_clip(*src
++, min
, max
);
2449 *dst
++ = av_clip(*src
++, min
, max
);
2450 *dst
++ = av_clip(*src
++, min
, max
);
2451 *dst
++ = av_clip(*src
++, min
, max
);
2452 *dst
++ = av_clip(*src
++, min
, max
);
2453 *dst
++ = av_clip(*src
++, min
, max
);
2454 *dst
++ = av_clip(*src
++, min
, max
);
2459 static void ff_jref_idct_put(uint8_t *dest
, int line_size
, int16_t *block
)
2461 ff_j_rev_dct (block
);
2462 put_pixels_clamped_c(block
, dest
, line_size
);
2464 static void ff_jref_idct_add(uint8_t *dest
, int line_size
, int16_t *block
)
2466 ff_j_rev_dct (block
);
2467 add_pixels_clamped_c(block
, dest
, line_size
);
2470 /* init static data */
2471 av_cold
void ff_dsputil_static_init(void)
2475 for(i
=0;i
<256;i
++) ff_cropTbl
[i
+ MAX_NEG_CROP
] = i
;
2476 for(i
=0;i
<MAX_NEG_CROP
;i
++) {
2478 ff_cropTbl
[i
+ MAX_NEG_CROP
+ 256] = 255;
2481 for(i
=0;i
<512;i
++) {
2482 ff_squareTbl
[i
] = (i
- 256) * (i
- 256);
2485 for(i
=0; i
<64; i
++) ff_inv_zigzag_direct16
[ff_zigzag_direct
[i
]]= i
+1;
2488 int ff_check_alignment(void){
2489 static int did_fail
=0;
2490 LOCAL_ALIGNED_16(int, aligned
, [4]);
2492 if((intptr_t)aligned
& 15){
2494 #if HAVE_MMX || HAVE_ALTIVEC
2495 av_log(NULL
, AV_LOG_ERROR
,
2496 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2497 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2498 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2499 "Do not report crashes to Libav developers.\n");
2508 av_cold
void ff_dsputil_init(DSPContext
* c
, AVCodecContext
*avctx
)
2510 ff_check_alignment();
2513 if (avctx
->bits_per_raw_sample
== 10) {
2514 c
->fdct
= ff_jpeg_fdct_islow_10
;
2515 c
->fdct248
= ff_fdct248_islow_10
;
2517 if(avctx
->dct_algo
==FF_DCT_FASTINT
) {
2518 c
->fdct
= ff_fdct_ifast
;
2519 c
->fdct248
= ff_fdct_ifast248
;
2521 else if(avctx
->dct_algo
==FF_DCT_FAAN
) {
2522 c
->fdct
= ff_faandct
;
2523 c
->fdct248
= ff_faandct248
;
2526 c
->fdct
= ff_jpeg_fdct_islow_8
; //slow/accurate/default
2527 c
->fdct248
= ff_fdct248_islow_8
;
2530 #endif //CONFIG_ENCODERS
2532 if (avctx
->bits_per_raw_sample
== 10) {
2533 c
->idct_put
= ff_simple_idct_put_10
;
2534 c
->idct_add
= ff_simple_idct_add_10
;
2535 c
->idct
= ff_simple_idct_10
;
2536 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
2538 if(avctx
->idct_algo
==FF_IDCT_INT
){
2539 c
->idct_put
= ff_jref_idct_put
;
2540 c
->idct_add
= ff_jref_idct_add
;
2541 c
->idct
= ff_j_rev_dct
;
2542 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
2543 }else if(avctx
->idct_algo
==FF_IDCT_FAAN
){
2544 c
->idct_put
= ff_faanidct_put
;
2545 c
->idct_add
= ff_faanidct_add
;
2546 c
->idct
= ff_faanidct
;
2547 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
2548 }else{ //accurate/default
2549 c
->idct_put
= ff_simple_idct_put_8
;
2550 c
->idct_add
= ff_simple_idct_add_8
;
2551 c
->idct
= ff_simple_idct_8
;
2552 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
2556 c
->diff_pixels
= diff_pixels_c
;
2557 c
->put_pixels_clamped
= put_pixels_clamped_c
;
2558 c
->put_signed_pixels_clamped
= put_signed_pixels_clamped_c
;
2559 c
->add_pixels_clamped
= add_pixels_clamped_c
;
2560 c
->sum_abs_dctelem
= sum_abs_dctelem_c
;
2563 c
->pix_sum
= pix_sum_c
;
2564 c
->pix_norm1
= pix_norm1_c
;
2566 c
->fill_block_tab
[0] = fill_block16_c
;
2567 c
->fill_block_tab
[1] = fill_block8_c
;
2569 /* TODO [0] 16 [1] 8 */
2570 c
->pix_abs
[0][0] = pix_abs16_c
;
2571 c
->pix_abs
[0][1] = pix_abs16_x2_c
;
2572 c
->pix_abs
[0][2] = pix_abs16_y2_c
;
2573 c
->pix_abs
[0][3] = pix_abs16_xy2_c
;
2574 c
->pix_abs
[1][0] = pix_abs8_c
;
2575 c
->pix_abs
[1][1] = pix_abs8_x2_c
;
2576 c
->pix_abs
[1][2] = pix_abs8_y2_c
;
2577 c
->pix_abs
[1][3] = pix_abs8_xy2_c
;
2579 c
->put_tpel_pixels_tab
[ 0] = put_tpel_pixels_mc00_c
;
2580 c
->put_tpel_pixels_tab
[ 1] = put_tpel_pixels_mc10_c
;
2581 c
->put_tpel_pixels_tab
[ 2] = put_tpel_pixels_mc20_c
;
2582 c
->put_tpel_pixels_tab
[ 4] = put_tpel_pixels_mc01_c
;
2583 c
->put_tpel_pixels_tab
[ 5] = put_tpel_pixels_mc11_c
;
2584 c
->put_tpel_pixels_tab
[ 6] = put_tpel_pixels_mc21_c
;
2585 c
->put_tpel_pixels_tab
[ 8] = put_tpel_pixels_mc02_c
;
2586 c
->put_tpel_pixels_tab
[ 9] = put_tpel_pixels_mc12_c
;
2587 c
->put_tpel_pixels_tab
[10] = put_tpel_pixels_mc22_c
;
2589 c
->avg_tpel_pixels_tab
[ 0] = avg_tpel_pixels_mc00_c
;
2590 c
->avg_tpel_pixels_tab
[ 1] = avg_tpel_pixels_mc10_c
;
2591 c
->avg_tpel_pixels_tab
[ 2] = avg_tpel_pixels_mc20_c
;
2592 c
->avg_tpel_pixels_tab
[ 4] = avg_tpel_pixels_mc01_c
;
2593 c
->avg_tpel_pixels_tab
[ 5] = avg_tpel_pixels_mc11_c
;
2594 c
->avg_tpel_pixels_tab
[ 6] = avg_tpel_pixels_mc21_c
;
2595 c
->avg_tpel_pixels_tab
[ 8] = avg_tpel_pixels_mc02_c
;
2596 c
->avg_tpel_pixels_tab
[ 9] = avg_tpel_pixels_mc12_c
;
2597 c
->avg_tpel_pixels_tab
[10] = avg_tpel_pixels_mc22_c
;
2599 #define dspfunc(PFX, IDX, NUM) \
2600 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2601 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2602 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2603 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2604 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2605 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2606 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2607 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2608 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2609 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2610 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2611 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2612 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2613 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2614 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2615 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2617 dspfunc(put_qpel
, 0, 16);
2618 dspfunc(put_no_rnd_qpel
, 0, 16);
2620 dspfunc(avg_qpel
, 0, 16);
2621 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2623 dspfunc(put_qpel
, 1, 8);
2624 dspfunc(put_no_rnd_qpel
, 1, 8);
2626 dspfunc(avg_qpel
, 1, 8);
2627 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2631 c
->put_mspel_pixels_tab
[0]= ff_put_pixels8x8_c
;
2632 c
->put_mspel_pixels_tab
[1]= put_mspel8_mc10_c
;
2633 c
->put_mspel_pixels_tab
[2]= put_mspel8_mc20_c
;
2634 c
->put_mspel_pixels_tab
[3]= put_mspel8_mc30_c
;
2635 c
->put_mspel_pixels_tab
[4]= put_mspel8_mc02_c
;
2636 c
->put_mspel_pixels_tab
[5]= put_mspel8_mc12_c
;
2637 c
->put_mspel_pixels_tab
[6]= put_mspel8_mc22_c
;
2638 c
->put_mspel_pixels_tab
[7]= put_mspel8_mc32_c
;
2640 #define SET_CMP_FUNC(name) \
2641 c->name[0]= name ## 16_c;\
2642 c->name[1]= name ## 8x8_c;
2644 SET_CMP_FUNC(hadamard8_diff
)
2645 c
->hadamard8_diff
[4]= hadamard8_intra16_c
;
2646 c
->hadamard8_diff
[5]= hadamard8_intra8x8_c
;
2647 SET_CMP_FUNC(dct_sad
)
2648 SET_CMP_FUNC(dct_max
)
2650 SET_CMP_FUNC(dct264_sad
)
2652 c
->sad
[0]= pix_abs16_c
;
2653 c
->sad
[1]= pix_abs8_c
;
2657 SET_CMP_FUNC(quant_psnr
)
2660 c
->vsad
[0]= vsad16_c
;
2661 c
->vsad
[4]= vsad_intra16_c
;
2662 c
->vsad
[5]= vsad_intra8_c
;
2663 c
->vsse
[0]= vsse16_c
;
2664 c
->vsse
[4]= vsse_intra16_c
;
2665 c
->vsse
[5]= vsse_intra8_c
;
2666 c
->nsse
[0]= nsse16_c
;
2667 c
->nsse
[1]= nsse8_c
;
2669 c
->ssd_int8_vs_int16
= ssd_int8_vs_int16_c
;
2671 c
->add_bytes
= add_bytes_c
;
2672 c
->diff_bytes
= diff_bytes_c
;
2673 c
->add_hfyu_median_prediction
= add_hfyu_median_prediction_c
;
2674 c
->sub_hfyu_median_prediction
= sub_hfyu_median_prediction_c
;
2675 c
->add_hfyu_left_prediction
= add_hfyu_left_prediction_c
;
2676 c
->add_hfyu_left_prediction_bgr32
= add_hfyu_left_prediction_bgr32_c
;
2677 c
->bswap_buf
= bswap_buf
;
2678 c
->bswap16_buf
= bswap16_buf
;
2680 if (CONFIG_H263_DECODER
|| CONFIG_H263_ENCODER
) {
2681 c
->h263_h_loop_filter
= h263_h_loop_filter_c
;
2682 c
->h263_v_loop_filter
= h263_v_loop_filter_c
;
2685 c
->h261_loop_filter
= h261_loop_filter_c
;
2687 c
->try_8x8basis
= try_8x8basis_c
;
2688 c
->add_8x8basis
= add_8x8basis_c
;
2690 c
->vector_clipf
= vector_clipf_c
;
2691 c
->scalarproduct_int16
= scalarproduct_int16_c
;
2692 c
->scalarproduct_and_madd_int16
= scalarproduct_and_madd_int16_c
;
2693 c
->apply_window_int16
= apply_window_int16_c
;
2694 c
->vector_clip_int32
= vector_clip_int32_c
;
2696 c
->shrink
[0]= av_image_copy_plane
;
2697 c
->shrink
[1]= ff_shrink22
;
2698 c
->shrink
[2]= ff_shrink44
;
2699 c
->shrink
[3]= ff_shrink88
;
2701 c
->add_pixels8
= add_pixels8_c
;
2703 #define hpel_funcs(prefix, idx, num) \
2704 c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \
2705 c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \
2706 c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \
2707 c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c
2709 hpel_funcs(put
, [0], 16);
2710 hpel_funcs(put
, [1], 8);
2711 hpel_funcs(put
, [2], 4);
2712 hpel_funcs(put
, [3], 2);
2713 hpel_funcs(put_no_rnd
, [0], 16);
2714 hpel_funcs(put_no_rnd
, [1], 8);
2715 hpel_funcs(avg
, [0], 16);
2716 hpel_funcs(avg
, [1], 8);
2717 hpel_funcs(avg
, [2], 4);
2718 hpel_funcs(avg
, [3], 2);
2719 hpel_funcs(avg_no_rnd
,, 16);
2723 #define FUNC(f, depth) f ## _ ## depth
2724 #define FUNCC(f, depth) f ## _ ## depth ## _c
2726 #define BIT_DEPTH_FUNCS(depth, dct)\
2727 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
2728 c->draw_edges = FUNCC(draw_edges , depth);\
2729 c->clear_block = FUNCC(clear_block ## dct , depth);\
2730 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
2732 switch (avctx->bits_per_raw_sample) {
2734 if (c
->dct_bits
== 32) {
2735 BIT_DEPTH_FUNCS(9, _32
);
2737 BIT_DEPTH_FUNCS(9, _16
);
2741 if (c
->dct_bits
== 32) {
2742 BIT_DEPTH_FUNCS(10, _32
);
2744 BIT_DEPTH_FUNCS(10, _16
);
2748 BIT_DEPTH_FUNCS(8, _16
);
2753 if (HAVE_MMX
) ff_dsputil_init_mmx (c
, avctx
);
2754 if (ARCH_ARM
) ff_dsputil_init_arm (c
, avctx
);
2755 if (HAVE_VIS
) ff_dsputil_init_vis (c
, avctx
);
2756 if (ARCH_ALPHA
) ff_dsputil_init_alpha (c
, avctx
);
2757 if (ARCH_PPC
) ff_dsputil_init_ppc (c
, avctx
);
2758 if (ARCH_SH4
) ff_dsputil_init_sh4 (c
, avctx
);
2759 if (ARCH_BFIN
) ff_dsputil_init_bfin (c
, avctx
);
2761 ff_init_scantable_permutation(c
->idct_permutation
,
2762 c
->idct_permutation_type
);