3 * Copyright (c) 2000, 2001 Fabrice Bellard.
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
31 #include "simple_idct.h"
35 void ff_spatial_dwt(int *buffer
, int width
, int height
, int stride
, int type
, int decomposition_count
);
37 uint8_t cropTbl
[256 + 2 * MAX_NEG_CROP
] = {0, };
38 uint32_t squareTbl
[512] = {0, };
40 const uint8_t ff_zigzag_direct
[64] = {
41 0, 1, 8, 16, 9, 2, 3, 10,
42 17, 24, 32, 25, 18, 11, 4, 5,
43 12, 19, 26, 33, 40, 48, 41, 34,
44 27, 20, 13, 6, 7, 14, 21, 28,
45 35, 42, 49, 56, 57, 50, 43, 36,
46 29, 22, 15, 23, 30, 37, 44, 51,
47 58, 59, 52, 45, 38, 31, 39, 46,
48 53, 60, 61, 54, 47, 55, 62, 63
51 /* Specific zigzag scan for 248 idct. NOTE that unlike the
52 specification, we interleave the fields */
53 const uint8_t ff_zigzag248_direct
[64] = {
54 0, 8, 1, 9, 16, 24, 2, 10,
55 17, 25, 32, 40, 48, 56, 33, 41,
56 18, 26, 3, 11, 4, 12, 19, 27,
57 34, 42, 49, 57, 50, 58, 35, 43,
58 20, 28, 5, 13, 6, 14, 21, 29,
59 36, 44, 51, 59, 52, 60, 37, 45,
60 22, 30, 7, 15, 23, 31, 38, 46,
61 53, 61, 54, 62, 39, 47, 55, 63,
64 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
65 uint16_t __align8 inv_zigzag_direct16
[64] = {0, };
67 const uint8_t ff_alternate_horizontal_scan
[64] = {
68 0, 1, 2, 3, 8, 9, 16, 17,
69 10, 11, 4, 5, 6, 7, 15, 14,
70 13, 12, 19, 18, 24, 25, 32, 33,
71 26, 27, 20, 21, 22, 23, 28, 29,
72 30, 31, 34, 35, 40, 41, 48, 49,
73 42, 43, 36, 37, 38, 39, 44, 45,
74 46, 47, 50, 51, 56, 57, 58, 59,
75 52, 53, 54, 55, 60, 61, 62, 63,
78 const uint8_t ff_alternate_vertical_scan
[64] = {
79 0, 8, 16, 24, 1, 9, 2, 10,
80 17, 25, 32, 40, 48, 56, 57, 49,
81 41, 33, 26, 18, 3, 11, 4, 12,
82 19, 27, 34, 42, 50, 58, 35, 43,
83 51, 59, 20, 28, 5, 13, 6, 14,
84 21, 29, 36, 44, 52, 60, 37, 45,
85 53, 61, 22, 30, 7, 15, 23, 31,
86 38, 46, 54, 62, 39, 47, 55, 63,
89 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
90 const uint32_t inverse
[256]={
91 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
92 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
93 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
94 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
95 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
96 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
97 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
98 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
99 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
100 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
101 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
102 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
103 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
104 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
105 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
106 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
107 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
108 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
109 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
110 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
111 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
112 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
113 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
114 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
115 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
116 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
117 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
118 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
119 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
120 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
121 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
122 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
125 /* Input permutation for the simple_idct_mmx */
126 static const uint8_t simple_mmx_permutation
[64]={
127 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
128 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
129 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
130 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
131 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
132 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
133 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
134 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
137 static int pix_sum_c(uint8_t * pix
, int line_size
)
142 for (i
= 0; i
< 16; i
++) {
143 for (j
= 0; j
< 16; j
+= 8) {
154 pix
+= line_size
- 16;
159 static int pix_norm1_c(uint8_t * pix
, int line_size
)
162 uint32_t *sq
= squareTbl
+ 256;
165 for (i
= 0; i
< 16; i
++) {
166 for (j
= 0; j
< 16; j
+= 8) {
177 #if LONG_MAX > 2147483647
178 register uint64_t x
=*(uint64_t*)pix
;
180 s
+= sq
[(x
>>8)&0xff];
181 s
+= sq
[(x
>>16)&0xff];
182 s
+= sq
[(x
>>24)&0xff];
183 s
+= sq
[(x
>>32)&0xff];
184 s
+= sq
[(x
>>40)&0xff];
185 s
+= sq
[(x
>>48)&0xff];
186 s
+= sq
[(x
>>56)&0xff];
188 register uint32_t x
=*(uint32_t*)pix
;
190 s
+= sq
[(x
>>8)&0xff];
191 s
+= sq
[(x
>>16)&0xff];
192 s
+= sq
[(x
>>24)&0xff];
193 x
=*(uint32_t*)(pix
+4);
195 s
+= sq
[(x
>>8)&0xff];
196 s
+= sq
[(x
>>16)&0xff];
197 s
+= sq
[(x
>>24)&0xff];
202 pix
+= line_size
- 16;
207 static void bswap_buf(uint32_t *dst
, uint32_t *src
, int w
){
210 for(i
=0; i
+8<=w
; i
+=8){
211 dst
[i
+0]= bswap_32(src
[i
+0]);
212 dst
[i
+1]= bswap_32(src
[i
+1]);
213 dst
[i
+2]= bswap_32(src
[i
+2]);
214 dst
[i
+3]= bswap_32(src
[i
+3]);
215 dst
[i
+4]= bswap_32(src
[i
+4]);
216 dst
[i
+5]= bswap_32(src
[i
+5]);
217 dst
[i
+6]= bswap_32(src
[i
+6]);
218 dst
[i
+7]= bswap_32(src
[i
+7]);
221 dst
[i
+0]= bswap_32(src
[i
+0]);
225 static int sse4_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
228 uint32_t *sq
= squareTbl
+ 256;
231 for (i
= 0; i
< h
; i
++) {
232 s
+= sq
[pix1
[0] - pix2
[0]];
233 s
+= sq
[pix1
[1] - pix2
[1]];
234 s
+= sq
[pix1
[2] - pix2
[2]];
235 s
+= sq
[pix1
[3] - pix2
[3]];
242 static int sse8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
245 uint32_t *sq
= squareTbl
+ 256;
248 for (i
= 0; i
< h
; i
++) {
249 s
+= sq
[pix1
[0] - pix2
[0]];
250 s
+= sq
[pix1
[1] - pix2
[1]];
251 s
+= sq
[pix1
[2] - pix2
[2]];
252 s
+= sq
[pix1
[3] - pix2
[3]];
253 s
+= sq
[pix1
[4] - pix2
[4]];
254 s
+= sq
[pix1
[5] - pix2
[5]];
255 s
+= sq
[pix1
[6] - pix2
[6]];
256 s
+= sq
[pix1
[7] - pix2
[7]];
263 static int sse16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
266 uint32_t *sq
= squareTbl
+ 256;
269 for (i
= 0; i
< h
; i
++) {
270 s
+= sq
[pix1
[ 0] - pix2
[ 0]];
271 s
+= sq
[pix1
[ 1] - pix2
[ 1]];
272 s
+= sq
[pix1
[ 2] - pix2
[ 2]];
273 s
+= sq
[pix1
[ 3] - pix2
[ 3]];
274 s
+= sq
[pix1
[ 4] - pix2
[ 4]];
275 s
+= sq
[pix1
[ 5] - pix2
[ 5]];
276 s
+= sq
[pix1
[ 6] - pix2
[ 6]];
277 s
+= sq
[pix1
[ 7] - pix2
[ 7]];
278 s
+= sq
[pix1
[ 8] - pix2
[ 8]];
279 s
+= sq
[pix1
[ 9] - pix2
[ 9]];
280 s
+= sq
[pix1
[10] - pix2
[10]];
281 s
+= sq
[pix1
[11] - pix2
[11]];
282 s
+= sq
[pix1
[12] - pix2
[12]];
283 s
+= sq
[pix1
[13] - pix2
[13]];
284 s
+= sq
[pix1
[14] - pix2
[14]];
285 s
+= sq
[pix1
[15] - pix2
[15]];
294 static inline int w_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int w
, int h
, int type
){
295 #ifdef CONFIG_SNOW_ENCODER //idwt is in snow.c
297 const int dec_count
= w
==8 ? 3 : 4;
301 static const int scale
[2][2][4][4]={
305 {268, 239, 239, 213},
310 {344, 310, 310, 280},
318 {275, 245, 245, 218},
323 {352, 317, 317, 286},
332 for (i
= 0; i
< h
; i
++) {
333 for (j
= 0; j
< w
; j
+=4) {
334 tmp
[16*i
+j
+0] = (pix1
[j
+0] - pix2
[j
+0])<<4;
335 tmp
[16*i
+j
+1] = (pix1
[j
+1] - pix2
[j
+1])<<4;
336 tmp
[16*i
+j
+2] = (pix1
[j
+2] - pix2
[j
+2])<<4;
337 tmp
[16*i
+j
+3] = (pix1
[j
+3] - pix2
[j
+3])<<4;
343 ff_spatial_dwt(tmp
, w
, h
, 16, type
, dec_count
);
347 for(level
=0; level
<dec_count
; level
++){
348 for(ori
= level
? 1 : 0; ori
<4; ori
++){
349 int sx
= (ori
&1) ? 1<<level
: 0;
350 int stride
= 16<<(dec_count
-level
);
351 int sy
= (ori
&2) ? stride
>>1 : 0;
354 for(i
=0; i
<size
; i
++){
355 for(j
=0; j
<size
; j
++){
356 int v
= tmp
[sx
+ sy
+ i
*stride
+ j
] * scale
[type
][dec_count
-3][level
][ori
];
363 for (i
= 0; i
< h
; i
++) {
364 for (j
= 0; j
< w
; j
+=4) {
365 s
+= ABS(tmp
[16*i
+j
+0]);
366 s
+= ABS(tmp
[16*i
+j
+1]);
367 s
+= ABS(tmp
[16*i
+j
+2]);
368 s
+= ABS(tmp
[16*i
+j
+3]);
377 static int w53_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
378 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 1);
381 static int w97_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
382 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 0);
385 static int w53_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
386 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 1);
389 static int w97_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
390 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 0);
393 static void get_pixels_c(DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
)
397 /* read the pixels */
399 block
[0] = pixels
[0];
400 block
[1] = pixels
[1];
401 block
[2] = pixels
[2];
402 block
[3] = pixels
[3];
403 block
[4] = pixels
[4];
404 block
[5] = pixels
[5];
405 block
[6] = pixels
[6];
406 block
[7] = pixels
[7];
412 static void diff_pixels_c(DCTELEM
*restrict block
, const uint8_t *s1
,
413 const uint8_t *s2
, int stride
){
416 /* read the pixels */
418 block
[0] = s1
[0] - s2
[0];
419 block
[1] = s1
[1] - s2
[1];
420 block
[2] = s1
[2] - s2
[2];
421 block
[3] = s1
[3] - s2
[3];
422 block
[4] = s1
[4] - s2
[4];
423 block
[5] = s1
[5] - s2
[5];
424 block
[6] = s1
[6] - s2
[6];
425 block
[7] = s1
[7] - s2
[7];
433 static void put_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
437 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
439 /* read the pixels */
441 pixels
[0] = cm
[block
[0]];
442 pixels
[1] = cm
[block
[1]];
443 pixels
[2] = cm
[block
[2]];
444 pixels
[3] = cm
[block
[3]];
445 pixels
[4] = cm
[block
[4]];
446 pixels
[5] = cm
[block
[5]];
447 pixels
[6] = cm
[block
[6]];
448 pixels
[7] = cm
[block
[7]];
455 static void put_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
459 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
461 /* read the pixels */
463 pixels
[0] = cm
[block
[0]];
464 pixels
[1] = cm
[block
[1]];
465 pixels
[2] = cm
[block
[2]];
466 pixels
[3] = cm
[block
[3]];
473 static void put_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
477 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
479 /* read the pixels */
481 pixels
[0] = cm
[block
[0]];
482 pixels
[1] = cm
[block
[1]];
489 static void put_signed_pixels_clamped_c(const DCTELEM
*block
,
490 uint8_t *restrict pixels
,
495 for (i
= 0; i
< 8; i
++) {
496 for (j
= 0; j
< 8; j
++) {
499 else if (*block
> 127)
502 *pixels
= (uint8_t)(*block
+ 128);
506 pixels
+= (line_size
- 8);
510 static void add_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
514 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
516 /* read the pixels */
518 pixels
[0] = cm
[pixels
[0] + block
[0]];
519 pixels
[1] = cm
[pixels
[1] + block
[1]];
520 pixels
[2] = cm
[pixels
[2] + block
[2]];
521 pixels
[3] = cm
[pixels
[3] + block
[3]];
522 pixels
[4] = cm
[pixels
[4] + block
[4]];
523 pixels
[5] = cm
[pixels
[5] + block
[5]];
524 pixels
[6] = cm
[pixels
[6] + block
[6]];
525 pixels
[7] = cm
[pixels
[7] + block
[7]];
531 static void add_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
535 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
537 /* read the pixels */
539 pixels
[0] = cm
[pixels
[0] + block
[0]];
540 pixels
[1] = cm
[pixels
[1] + block
[1]];
541 pixels
[2] = cm
[pixels
[2] + block
[2]];
542 pixels
[3] = cm
[pixels
[3] + block
[3]];
548 static void add_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
552 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
554 /* read the pixels */
556 pixels
[0] = cm
[pixels
[0] + block
[0]];
557 pixels
[1] = cm
[pixels
[1] + block
[1]];
563 static void add_pixels8_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
567 pixels
[0] += block
[0];
568 pixels
[1] += block
[1];
569 pixels
[2] += block
[2];
570 pixels
[3] += block
[3];
571 pixels
[4] += block
[4];
572 pixels
[5] += block
[5];
573 pixels
[6] += block
[6];
574 pixels
[7] += block
[7];
580 static void add_pixels4_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
584 pixels
[0] += block
[0];
585 pixels
[1] += block
[1];
586 pixels
[2] += block
[2];
587 pixels
[3] += block
[3];
595 #define PIXOP2(OPNAME, OP) \
596 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
600 OP(*((uint64_t*)block), LD64(pixels));\
606 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
610 const uint64_t a= LD64(pixels );\
611 const uint64_t b= LD64(pixels+1);\
612 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
618 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
622 const uint64_t a= LD64(pixels );\
623 const uint64_t b= LD64(pixels+1);\
624 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
630 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
634 const uint64_t a= LD64(pixels );\
635 const uint64_t b= LD64(pixels+line_size);\
636 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
642 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
646 const uint64_t a= LD64(pixels );\
647 const uint64_t b= LD64(pixels+line_size);\
648 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
654 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
657 const uint64_t a= LD64(pixels );\
658 const uint64_t b= LD64(pixels+1);\
659 uint64_t l0= (a&0x0303030303030303ULL)\
660 + (b&0x0303030303030303ULL)\
661 + 0x0202020202020202ULL;\
662 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
663 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
667 for(i=0; i<h; i+=2){\
668 uint64_t a= LD64(pixels );\
669 uint64_t b= LD64(pixels+1);\
670 l1= (a&0x0303030303030303ULL)\
671 + (b&0x0303030303030303ULL);\
672 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
673 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
674 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
679 l0= (a&0x0303030303030303ULL)\
680 + (b&0x0303030303030303ULL)\
681 + 0x0202020202020202ULL;\
682 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
683 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
684 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
690 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
693 const uint64_t a= LD64(pixels );\
694 const uint64_t b= LD64(pixels+1);\
695 uint64_t l0= (a&0x0303030303030303ULL)\
696 + (b&0x0303030303030303ULL)\
697 + 0x0101010101010101ULL;\
698 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
699 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
703 for(i=0; i<h; i+=2){\
704 uint64_t a= LD64(pixels );\
705 uint64_t b= LD64(pixels+1);\
706 l1= (a&0x0303030303030303ULL)\
707 + (b&0x0303030303030303ULL);\
708 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
709 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
710 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
715 l0= (a&0x0303030303030303ULL)\
716 + (b&0x0303030303030303ULL)\
717 + 0x0101010101010101ULL;\
718 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
719 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
720 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
726 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
727 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
728 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
729 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
730 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
731 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
732 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
734 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
735 #else // 64 bit variant
737 #define PIXOP2(OPNAME, OP) \
738 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
741 OP(*((uint16_t*)(block )), LD16(pixels ));\
746 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
749 OP(*((uint32_t*)(block )), LD32(pixels ));\
754 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
757 OP(*((uint32_t*)(block )), LD32(pixels ));\
758 OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
763 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
764 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
767 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
768 int src_stride1, int src_stride2, int h){\
772 a= LD32(&src1[i*src_stride1 ]);\
773 b= LD32(&src2[i*src_stride2 ]);\
774 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
775 a= LD32(&src1[i*src_stride1+4]);\
776 b= LD32(&src2[i*src_stride2+4]);\
777 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
781 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
782 int src_stride1, int src_stride2, int h){\
786 a= LD32(&src1[i*src_stride1 ]);\
787 b= LD32(&src2[i*src_stride2 ]);\
788 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
789 a= LD32(&src1[i*src_stride1+4]);\
790 b= LD32(&src2[i*src_stride2+4]);\
791 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
795 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
796 int src_stride1, int src_stride2, int h){\
800 a= LD32(&src1[i*src_stride1 ]);\
801 b= LD32(&src2[i*src_stride2 ]);\
802 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
806 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
807 int src_stride1, int src_stride2, int h){\
811 a= LD16(&src1[i*src_stride1 ]);\
812 b= LD16(&src2[i*src_stride2 ]);\
813 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
817 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
818 int src_stride1, int src_stride2, int h){\
819 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
820 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
823 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
824 int src_stride1, int src_stride2, int h){\
825 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
826 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
829 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
830 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
833 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
834 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
837 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
838 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
841 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
842 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
845 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
846 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
849 uint32_t a, b, c, d, l0, l1, h0, h1;\
850 a= LD32(&src1[i*src_stride1]);\
851 b= LD32(&src2[i*src_stride2]);\
852 c= LD32(&src3[i*src_stride3]);\
853 d= LD32(&src4[i*src_stride4]);\
854 l0= (a&0x03030303UL)\
857 h0= ((a&0xFCFCFCFCUL)>>2)\
858 + ((b&0xFCFCFCFCUL)>>2);\
859 l1= (c&0x03030303UL)\
861 h1= ((c&0xFCFCFCFCUL)>>2)\
862 + ((d&0xFCFCFCFCUL)>>2);\
863 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
864 a= LD32(&src1[i*src_stride1+4]);\
865 b= LD32(&src2[i*src_stride2+4]);\
866 c= LD32(&src3[i*src_stride3+4]);\
867 d= LD32(&src4[i*src_stride4+4]);\
868 l0= (a&0x03030303UL)\
871 h0= ((a&0xFCFCFCFCUL)>>2)\
872 + ((b&0xFCFCFCFCUL)>>2);\
873 l1= (c&0x03030303UL)\
875 h1= ((c&0xFCFCFCFCUL)>>2)\
876 + ((d&0xFCFCFCFCUL)>>2);\
877 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
881 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
882 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
885 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
886 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
889 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
890 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
893 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
894 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
897 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
898 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
901 uint32_t a, b, c, d, l0, l1, h0, h1;\
902 a= LD32(&src1[i*src_stride1]);\
903 b= LD32(&src2[i*src_stride2]);\
904 c= LD32(&src3[i*src_stride3]);\
905 d= LD32(&src4[i*src_stride4]);\
906 l0= (a&0x03030303UL)\
909 h0= ((a&0xFCFCFCFCUL)>>2)\
910 + ((b&0xFCFCFCFCUL)>>2);\
911 l1= (c&0x03030303UL)\
913 h1= ((c&0xFCFCFCFCUL)>>2)\
914 + ((d&0xFCFCFCFCUL)>>2);\
915 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
916 a= LD32(&src1[i*src_stride1+4]);\
917 b= LD32(&src2[i*src_stride2+4]);\
918 c= LD32(&src3[i*src_stride3+4]);\
919 d= LD32(&src4[i*src_stride4+4]);\
920 l0= (a&0x03030303UL)\
923 h0= ((a&0xFCFCFCFCUL)>>2)\
924 + ((b&0xFCFCFCFCUL)>>2);\
925 l1= (c&0x03030303UL)\
927 h1= ((c&0xFCFCFCFCUL)>>2)\
928 + ((d&0xFCFCFCFCUL)>>2);\
929 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
932 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
933 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
934 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
935 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
937 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
938 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
939 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
940 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
943 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
945 int i, a0, b0, a1, b1;\
952 for(i=0; i<h; i+=2){\
958 block[0]= (a1+a0)>>2; /* FIXME non put */\
959 block[1]= (b1+b0)>>2;\
969 block[0]= (a1+a0)>>2;\
970 block[1]= (b1+b0)>>2;\
976 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
979 const uint32_t a= LD32(pixels );\
980 const uint32_t b= LD32(pixels+1);\
981 uint32_t l0= (a&0x03030303UL)\
984 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
985 + ((b&0xFCFCFCFCUL)>>2);\
989 for(i=0; i<h; i+=2){\
990 uint32_t a= LD32(pixels );\
991 uint32_t b= LD32(pixels+1);\
992 l1= (a&0x03030303UL)\
994 h1= ((a&0xFCFCFCFCUL)>>2)\
995 + ((b&0xFCFCFCFCUL)>>2);\
996 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1001 l0= (a&0x03030303UL)\
1004 h0= ((a&0xFCFCFCFCUL)>>2)\
1005 + ((b&0xFCFCFCFCUL)>>2);\
1006 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1012 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1015 for(j=0; j<2; j++){\
1017 const uint32_t a= LD32(pixels );\
1018 const uint32_t b= LD32(pixels+1);\
1019 uint32_t l0= (a&0x03030303UL)\
1022 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1023 + ((b&0xFCFCFCFCUL)>>2);\
1027 for(i=0; i<h; i+=2){\
1028 uint32_t a= LD32(pixels );\
1029 uint32_t b= LD32(pixels+1);\
1030 l1= (a&0x03030303UL)\
1031 + (b&0x03030303UL);\
1032 h1= ((a&0xFCFCFCFCUL)>>2)\
1033 + ((b&0xFCFCFCFCUL)>>2);\
1034 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1039 l0= (a&0x03030303UL)\
1042 h0= ((a&0xFCFCFCFCUL)>>2)\
1043 + ((b&0xFCFCFCFCUL)>>2);\
1044 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1048 pixels+=4-line_size*(h+1);\
1049 block +=4-line_size*h;\
1053 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1056 for(j=0; j<2; j++){\
1058 const uint32_t a= LD32(pixels );\
1059 const uint32_t b= LD32(pixels+1);\
1060 uint32_t l0= (a&0x03030303UL)\
1063 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1064 + ((b&0xFCFCFCFCUL)>>2);\
1068 for(i=0; i<h; i+=2){\
1069 uint32_t a= LD32(pixels );\
1070 uint32_t b= LD32(pixels+1);\
1071 l1= (a&0x03030303UL)\
1072 + (b&0x03030303UL);\
1073 h1= ((a&0xFCFCFCFCUL)>>2)\
1074 + ((b&0xFCFCFCFCUL)>>2);\
1075 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1080 l0= (a&0x03030303UL)\
1083 h0= ((a&0xFCFCFCFCUL)>>2)\
1084 + ((b&0xFCFCFCFCUL)>>2);\
1085 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1089 pixels+=4-line_size*(h+1);\
1090 block +=4-line_size*h;\
1094 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1095 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1096 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1097 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1098 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1099 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1100 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1101 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1103 #define op_avg(a, b) a = rnd_avg32(a, b)
1105 #define op_put(a, b) a = b
1112 #define avg2(a,b) ((a+b+1)>>1)
1113 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1115 static void put_no_rnd_pixels16_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1116 put_no_rnd_pixels16_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1119 static void put_no_rnd_pixels8_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1120 put_no_rnd_pixels8_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1123 static void gmc1_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int x16
, int y16
, int rounder
)
1125 const int A
=(16-x16
)*(16-y16
);
1126 const int B
=( x16
)*(16-y16
);
1127 const int C
=(16-x16
)*( y16
);
1128 const int D
=( x16
)*( y16
);
1133 dst
[0]= (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + rounder
)>>8;
1134 dst
[1]= (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + rounder
)>>8;
1135 dst
[2]= (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + rounder
)>>8;
1136 dst
[3]= (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + rounder
)>>8;
1137 dst
[4]= (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + rounder
)>>8;
1138 dst
[5]= (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + rounder
)>>8;
1139 dst
[6]= (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + rounder
)>>8;
1140 dst
[7]= (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + rounder
)>>8;
1146 static void gmc_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int ox
, int oy
,
1147 int dxx
, int dxy
, int dyx
, int dyy
, int shift
, int r
, int width
, int height
)
1150 const int s
= 1<<shift
;
1160 for(x
=0; x
<8; x
++){ //XXX FIXME optimize
1161 int src_x
, src_y
, frac_x
, frac_y
, index
;
1165 frac_x
= src_x
&(s
-1);
1166 frac_y
= src_y
&(s
-1);
1170 if((unsigned)src_x
< width
){
1171 if((unsigned)src_y
< height
){
1172 index
= src_x
+ src_y
*stride
;
1173 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1174 + src
[index
+1]* frac_x
)*(s
-frac_y
)
1175 + ( src
[index
+stride
]*(s
-frac_x
)
1176 + src
[index
+stride
+1]* frac_x
)* frac_y
1179 index
= src_x
+ clip(src_y
, 0, height
)*stride
;
1180 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1181 + src
[index
+1]* frac_x
)*s
1185 if((unsigned)src_y
< height
){
1186 index
= clip(src_x
, 0, width
) + src_y
*stride
;
1187 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_y
)
1188 + src
[index
+stride
]* frac_y
)*s
1191 index
= clip(src_x
, 0, width
) + clip(src_y
, 0, height
)*stride
;
1192 dst
[y
*stride
+ x
]= src
[index
];
1204 static inline void put_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1206 case 2: put_pixels2_c (dst
, src
, stride
, height
); break;
1207 case 4: put_pixels4_c (dst
, src
, stride
, height
); break;
1208 case 8: put_pixels8_c (dst
, src
, stride
, height
); break;
1209 case 16:put_pixels16_c(dst
, src
, stride
, height
); break;
1213 static inline void put_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1215 for (i
=0; i
< height
; i
++) {
1216 for (j
=0; j
< width
; j
++) {
1217 dst
[j
] = (683*(2*src
[j
] + src
[j
+1] + 1)) >> 11;
1224 static inline void put_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1226 for (i
=0; i
< height
; i
++) {
1227 for (j
=0; j
< width
; j
++) {
1228 dst
[j
] = (683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11;
1235 static inline void put_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1237 for (i
=0; i
< height
; i
++) {
1238 for (j
=0; j
< width
; j
++) {
1239 dst
[j
] = (683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11;
1246 static inline void put_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1248 for (i
=0; i
< height
; i
++) {
1249 for (j
=0; j
< width
; j
++) {
1250 dst
[j
] = (2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15;
1257 static inline void put_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1259 for (i
=0; i
< height
; i
++) {
1260 for (j
=0; j
< width
; j
++) {
1261 dst
[j
] = (2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1268 static inline void put_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1270 for (i
=0; i
< height
; i
++) {
1271 for (j
=0; j
< width
; j
++) {
1272 dst
[j
] = (683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11;
1279 static inline void put_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1281 for (i
=0; i
< height
; i
++) {
1282 for (j
=0; j
< width
; j
++) {
1283 dst
[j
] = (2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1290 static inline void put_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1292 for (i
=0; i
< height
; i
++) {
1293 for (j
=0; j
< width
; j
++) {
1294 dst
[j
] = (2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15;
1301 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1303 case 2: avg_pixels2_c (dst
, src
, stride
, height
); break;
1304 case 4: avg_pixels4_c (dst
, src
, stride
, height
); break;
1305 case 8: avg_pixels8_c (dst
, src
, stride
, height
); break;
1306 case 16:avg_pixels16_c(dst
, src
, stride
, height
); break;
1310 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1312 for (i
=0; i
< height
; i
++) {
1313 for (j
=0; j
< width
; j
++) {
1314 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+1] + 1)) >> 11) + 1) >> 1;
1321 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1323 for (i
=0; i
< height
; i
++) {
1324 for (j
=0; j
< width
; j
++) {
1325 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11) + 1) >> 1;
1332 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1334 for (i
=0; i
< height
; i
++) {
1335 for (j
=0; j
< width
; j
++) {
1336 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1343 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1345 for (i
=0; i
< height
; i
++) {
1346 for (j
=0; j
< width
; j
++) {
1347 dst
[j
] = (dst
[j
] + ((2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1354 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1356 for (i
=0; i
< height
; i
++) {
1357 for (j
=0; j
< width
; j
++) {
1358 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1365 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1367 for (i
=0; i
< height
; i
++) {
1368 for (j
=0; j
< width
; j
++) {
1369 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1376 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1378 for (i
=0; i
< height
; i
++) {
1379 for (j
=0; j
< width
; j
++) {
1380 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1387 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1389 for (i
=0; i
< height
; i
++) {
1390 for (j
=0; j
< width
; j
++) {
1391 dst
[j
] = (dst
[j
] + ((2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1398 #define TPEL_WIDTH(width)\
1399 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1400 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1401 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1402 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1403 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1404 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1405 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1406 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1407 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1408 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1409 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1410 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1411 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1412 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1413 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1414 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1415 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1416 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1419 #define H264_CHROMA_MC(OPNAME, OP)\
1420 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1421 const int A=(8-x)*(8-y);\
1422 const int B=( x)*(8-y);\
1423 const int C=(8-x)*( y);\
1424 const int D=( x)*( y);\
1427 assert(x<8 && y<8 && x>=0 && y>=0);\
1431 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1432 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1438 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1439 const int A=(8-x)*(8-y);\
1440 const int B=( x)*(8-y);\
1441 const int C=(8-x)*( y);\
1442 const int D=( x)*( y);\
1445 assert(x<8 && y<8 && x>=0 && y>=0);\
1449 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1450 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1451 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1452 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1458 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1459 const int A=(8-x)*(8-y);\
1460 const int B=( x)*(8-y);\
1461 const int C=(8-x)*( y);\
1462 const int D=( x)*( y);\
1465 assert(x<8 && y<8 && x>=0 && y>=0);\
1469 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1470 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1471 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1472 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1473 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1474 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1475 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1476 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1482 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1483 #define op_put(a, b) a = (((b) + 32)>>6)
1485 H264_CHROMA_MC(put_
, op_put
)
1486 H264_CHROMA_MC(avg_
, op_avg
)
1490 static inline void copy_block4(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
)
1495 ST32(dst
, LD32(src
));
1501 static inline void copy_block8(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
)
1506 ST32(dst
, LD32(src
));
1507 ST32(dst
+4 , LD32(src
+4 ));
1513 static inline void copy_block16(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
)
1518 ST32(dst
, LD32(src
));
1519 ST32(dst
+4 , LD32(src
+4 ));
1520 ST32(dst
+8 , LD32(src
+8 ));
1521 ST32(dst
+12, LD32(src
+12));
1527 static inline void copy_block17(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
)
1532 ST32(dst
, LD32(src
));
1533 ST32(dst
+4 , LD32(src
+4 ));
1534 ST32(dst
+8 , LD32(src
+8 ));
1535 ST32(dst
+12, LD32(src
+12));
1542 static inline void copy_block9(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
)
1547 ST32(dst
, LD32(src
));
1548 ST32(dst
+4 , LD32(src
+4 ));
1556 #define QPEL_MC(r, OPNAME, RND, OP) \
1557 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1558 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1562 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1563 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1564 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1565 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1566 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1567 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1568 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1569 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1575 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1577 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1581 const int src0= src[0*srcStride];\
1582 const int src1= src[1*srcStride];\
1583 const int src2= src[2*srcStride];\
1584 const int src3= src[3*srcStride];\
1585 const int src4= src[4*srcStride];\
1586 const int src5= src[5*srcStride];\
1587 const int src6= src[6*srcStride];\
1588 const int src7= src[7*srcStride];\
1589 const int src8= src[8*srcStride];\
1590 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1591 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1592 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1593 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1594 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1595 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1596 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1597 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1603 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1604 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1609 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1610 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1611 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1612 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1613 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1614 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1615 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1616 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1617 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1618 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1619 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1620 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1621 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1622 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1623 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1624 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1630 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1631 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1636 const int src0= src[0*srcStride];\
1637 const int src1= src[1*srcStride];\
1638 const int src2= src[2*srcStride];\
1639 const int src3= src[3*srcStride];\
1640 const int src4= src[4*srcStride];\
1641 const int src5= src[5*srcStride];\
1642 const int src6= src[6*srcStride];\
1643 const int src7= src[7*srcStride];\
1644 const int src8= src[8*srcStride];\
1645 const int src9= src[9*srcStride];\
1646 const int src10= src[10*srcStride];\
1647 const int src11= src[11*srcStride];\
1648 const int src12= src[12*srcStride];\
1649 const int src13= src[13*srcStride];\
1650 const int src14= src[14*srcStride];\
1651 const int src15= src[15*srcStride];\
1652 const int src16= src[16*srcStride];\
1653 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1654 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1655 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1656 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1657 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1658 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1659 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1660 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1661 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1662 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1663 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1664 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1665 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1666 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1667 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1668 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1674 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1675 OPNAME ## pixels8_c(dst, src, stride, 8);\
1678 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1680 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1681 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1684 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1685 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1688 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1690 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1691 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1694 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1695 uint8_t full[16*9];\
1697 copy_block9(full, src, 16, stride, 9);\
1698 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1699 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1702 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1703 uint8_t full[16*9];\
1704 copy_block9(full, src, 16, stride, 9);\
1705 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1708 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1709 uint8_t full[16*9];\
1711 copy_block9(full, src, 16, stride, 9);\
1712 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1713 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1715 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1716 uint8_t full[16*9];\
1719 uint8_t halfHV[64];\
1720 copy_block9(full, src, 16, stride, 9);\
1721 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1722 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1723 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1724 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1726 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1727 uint8_t full[16*9];\
1729 uint8_t halfHV[64];\
1730 copy_block9(full, src, 16, stride, 9);\
1731 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1732 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1733 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1734 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1736 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1737 uint8_t full[16*9];\
1740 uint8_t halfHV[64];\
1741 copy_block9(full, src, 16, stride, 9);\
1742 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1743 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1744 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1745 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1747 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1748 uint8_t full[16*9];\
1750 uint8_t halfHV[64];\
1751 copy_block9(full, src, 16, stride, 9);\
1752 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1753 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1754 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1755 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1757 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1758 uint8_t full[16*9];\
1761 uint8_t halfHV[64];\
1762 copy_block9(full, src, 16, stride, 9);\
1763 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1764 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1765 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1766 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1768 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1769 uint8_t full[16*9];\
1771 uint8_t halfHV[64];\
1772 copy_block9(full, src, 16, stride, 9);\
1773 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1774 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1775 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1776 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1778 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1779 uint8_t full[16*9];\
1782 uint8_t halfHV[64];\
1783 copy_block9(full, src, 16, stride, 9);\
1784 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1785 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1786 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1787 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1789 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1790 uint8_t full[16*9];\
1792 uint8_t halfHV[64];\
1793 copy_block9(full, src, 16, stride, 9);\
1794 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1795 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1796 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1797 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1799 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1801 uint8_t halfHV[64];\
1802 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1803 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1804 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1806 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1808 uint8_t halfHV[64];\
1809 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1810 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1811 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1813 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1814 uint8_t full[16*9];\
1817 uint8_t halfHV[64];\
1818 copy_block9(full, src, 16, stride, 9);\
1819 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1820 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1821 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1822 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1824 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1825 uint8_t full[16*9];\
1827 copy_block9(full, src, 16, stride, 9);\
1828 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1829 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1830 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1832 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1833 uint8_t full[16*9];\
1836 uint8_t halfHV[64];\
1837 copy_block9(full, src, 16, stride, 9);\
1838 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1839 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1840 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1841 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1843 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1844 uint8_t full[16*9];\
1846 copy_block9(full, src, 16, stride, 9);\
1847 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1848 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1849 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1851 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1853 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1854 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1856 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1857 OPNAME ## pixels16_c(dst, src, stride, 16);\
1860 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1862 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1863 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1866 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1867 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1870 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1872 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1873 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1876 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1877 uint8_t full[24*17];\
1879 copy_block17(full, src, 24, stride, 17);\
1880 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1881 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1884 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1885 uint8_t full[24*17];\
1886 copy_block17(full, src, 24, stride, 17);\
1887 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1890 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1891 uint8_t full[24*17];\
1893 copy_block17(full, src, 24, stride, 17);\
1894 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1895 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1897 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1898 uint8_t full[24*17];\
1899 uint8_t halfH[272];\
1900 uint8_t halfV[256];\
1901 uint8_t halfHV[256];\
1902 copy_block17(full, src, 24, stride, 17);\
1903 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1904 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1905 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1906 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1908 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1909 uint8_t full[24*17];\
1910 uint8_t halfH[272];\
1911 uint8_t halfHV[256];\
1912 copy_block17(full, src, 24, stride, 17);\
1913 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1914 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1915 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1916 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1918 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1919 uint8_t full[24*17];\
1920 uint8_t halfH[272];\
1921 uint8_t halfV[256];\
1922 uint8_t halfHV[256];\
1923 copy_block17(full, src, 24, stride, 17);\
1924 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1925 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1926 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1927 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1929 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1930 uint8_t full[24*17];\
1931 uint8_t halfH[272];\
1932 uint8_t halfHV[256];\
1933 copy_block17(full, src, 24, stride, 17);\
1934 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1935 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1936 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1937 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1939 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1940 uint8_t full[24*17];\
1941 uint8_t halfH[272];\
1942 uint8_t halfV[256];\
1943 uint8_t halfHV[256];\
1944 copy_block17(full, src, 24, stride, 17);\
1945 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1946 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1947 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1948 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1950 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1951 uint8_t full[24*17];\
1952 uint8_t halfH[272];\
1953 uint8_t halfHV[256];\
1954 copy_block17(full, src, 24, stride, 17);\
1955 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1956 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1957 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1958 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1960 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1961 uint8_t full[24*17];\
1962 uint8_t halfH[272];\
1963 uint8_t halfV[256];\
1964 uint8_t halfHV[256];\
1965 copy_block17(full, src, 24, stride, 17);\
1966 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1967 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1968 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1969 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1971 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1972 uint8_t full[24*17];\
1973 uint8_t halfH[272];\
1974 uint8_t halfHV[256];\
1975 copy_block17(full, src, 24, stride, 17);\
1976 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1977 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1978 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1979 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1981 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1982 uint8_t halfH[272];\
1983 uint8_t halfHV[256];\
1984 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1985 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1986 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1988 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1989 uint8_t halfH[272];\
1990 uint8_t halfHV[256];\
1991 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1992 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1993 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1995 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1996 uint8_t full[24*17];\
1997 uint8_t halfH[272];\
1998 uint8_t halfV[256];\
1999 uint8_t halfHV[256];\
2000 copy_block17(full, src, 24, stride, 17);\
2001 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2002 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2003 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2004 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2006 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2007 uint8_t full[24*17];\
2008 uint8_t halfH[272];\
2009 copy_block17(full, src, 24, stride, 17);\
2010 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2011 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2012 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2014 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2015 uint8_t full[24*17];\
2016 uint8_t halfH[272];\
2017 uint8_t halfV[256];\
2018 uint8_t halfHV[256];\
2019 copy_block17(full, src, 24, stride, 17);\
2020 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2021 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2022 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2023 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2025 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2026 uint8_t full[24*17];\
2027 uint8_t halfH[272];\
2028 copy_block17(full, src, 24, stride, 17);\
2029 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2030 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2031 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2033 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2034 uint8_t halfH[272];\
2035 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2036 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2039 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2040 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2041 #define op_put(a, b) a = cm[((b) + 16)>>5]
2042 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2044 QPEL_MC(0, put_
, _
, op_put
)
2045 QPEL_MC(1, put_no_rnd_
, _no_rnd_
, op_put_no_rnd
)
2046 QPEL_MC(0, avg_
, _
, op_avg
)
2047 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
2049 #undef op_avg_no_rnd
2051 #undef op_put_no_rnd
2054 #define H264_LOWPASS(OPNAME, OP, OP2) \
2055 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2057 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2061 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2062 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2063 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2064 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2070 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2072 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2076 const int srcB= src[-2*srcStride];\
2077 const int srcA= src[-1*srcStride];\
2078 const int src0= src[0 *srcStride];\
2079 const int src1= src[1 *srcStride];\
2080 const int src2= src[2 *srcStride];\
2081 const int src3= src[3 *srcStride];\
2082 const int src4= src[4 *srcStride];\
2083 const int src5= src[5 *srcStride];\
2084 const int src6= src[6 *srcStride];\
2085 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2086 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2087 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2088 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2094 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2097 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2099 src -= 2*srcStride;\
2100 for(i=0; i<h+5; i++)\
2102 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2103 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2104 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2105 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2109 tmp -= tmpStride*(h+5-2);\
2112 const int tmpB= tmp[-2*tmpStride];\
2113 const int tmpA= tmp[-1*tmpStride];\
2114 const int tmp0= tmp[0 *tmpStride];\
2115 const int tmp1= tmp[1 *tmpStride];\
2116 const int tmp2= tmp[2 *tmpStride];\
2117 const int tmp3= tmp[3 *tmpStride];\
2118 const int tmp4= tmp[4 *tmpStride];\
2119 const int tmp5= tmp[5 *tmpStride];\
2120 const int tmp6= tmp[6 *tmpStride];\
2121 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2122 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2123 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2124 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2130 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2132 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2136 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2137 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2138 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2139 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2140 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2141 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2142 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2143 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2149 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2151 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2155 const int srcB= src[-2*srcStride];\
2156 const int srcA= src[-1*srcStride];\
2157 const int src0= src[0 *srcStride];\
2158 const int src1= src[1 *srcStride];\
2159 const int src2= src[2 *srcStride];\
2160 const int src3= src[3 *srcStride];\
2161 const int src4= src[4 *srcStride];\
2162 const int src5= src[5 *srcStride];\
2163 const int src6= src[6 *srcStride];\
2164 const int src7= src[7 *srcStride];\
2165 const int src8= src[8 *srcStride];\
2166 const int src9= src[9 *srcStride];\
2167 const int src10=src[10*srcStride];\
2168 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2169 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2170 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2171 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2172 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2173 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2174 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2175 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2181 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2184 uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2186 src -= 2*srcStride;\
2187 for(i=0; i<h+5; i++)\
2189 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2190 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2191 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2192 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2193 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2194 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2195 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2196 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2200 tmp -= tmpStride*(h+5-2);\
2203 const int tmpB= tmp[-2*tmpStride];\
2204 const int tmpA= tmp[-1*tmpStride];\
2205 const int tmp0= tmp[0 *tmpStride];\
2206 const int tmp1= tmp[1 *tmpStride];\
2207 const int tmp2= tmp[2 *tmpStride];\
2208 const int tmp3= tmp[3 *tmpStride];\
2209 const int tmp4= tmp[4 *tmpStride];\
2210 const int tmp5= tmp[5 *tmpStride];\
2211 const int tmp6= tmp[6 *tmpStride];\
2212 const int tmp7= tmp[7 *tmpStride];\
2213 const int tmp8= tmp[8 *tmpStride];\
2214 const int tmp9= tmp[9 *tmpStride];\
2215 const int tmp10=tmp[10*tmpStride];\
2216 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2217 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2218 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2219 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2220 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2221 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2222 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2223 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2229 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2230 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2231 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2232 src += 8*srcStride;\
2233 dst += 8*dstStride;\
2234 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2235 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2238 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2239 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2240 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2241 src += 8*srcStride;\
2242 dst += 8*dstStride;\
2243 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2244 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2247 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2248 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2249 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2250 src += 8*srcStride;\
2251 dst += 8*dstStride;\
2252 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2253 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2256 #define H264_MC(OPNAME, SIZE) \
2257 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2258 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2261 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2262 uint8_t half[SIZE*SIZE];\
2263 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2264 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2267 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2268 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2271 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2272 uint8_t half[SIZE*SIZE];\
2273 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2274 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2277 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2278 uint8_t full[SIZE*(SIZE+5)];\
2279 uint8_t * const full_mid= full + SIZE*2;\
2280 uint8_t half[SIZE*SIZE];\
2281 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2282 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2283 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2286 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2287 uint8_t full[SIZE*(SIZE+5)];\
2288 uint8_t * const full_mid= full + SIZE*2;\
2289 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2290 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2293 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2294 uint8_t full[SIZE*(SIZE+5)];\
2295 uint8_t * const full_mid= full + SIZE*2;\
2296 uint8_t half[SIZE*SIZE];\
2297 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2298 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2299 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2302 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2303 uint8_t full[SIZE*(SIZE+5)];\
2304 uint8_t * const full_mid= full + SIZE*2;\
2305 uint8_t halfH[SIZE*SIZE];\
2306 uint8_t halfV[SIZE*SIZE];\
2307 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2308 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2309 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2310 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2313 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2314 uint8_t full[SIZE*(SIZE+5)];\
2315 uint8_t * const full_mid= full + SIZE*2;\
2316 uint8_t halfH[SIZE*SIZE];\
2317 uint8_t halfV[SIZE*SIZE];\
2318 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2319 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2320 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2321 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2324 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2325 uint8_t full[SIZE*(SIZE+5)];\
2326 uint8_t * const full_mid= full + SIZE*2;\
2327 uint8_t halfH[SIZE*SIZE];\
2328 uint8_t halfV[SIZE*SIZE];\
2329 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2330 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2331 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2332 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2335 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2336 uint8_t full[SIZE*(SIZE+5)];\
2337 uint8_t * const full_mid= full + SIZE*2;\
2338 uint8_t halfH[SIZE*SIZE];\
2339 uint8_t halfV[SIZE*SIZE];\
2340 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2341 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2342 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2343 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2346 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2347 int16_t tmp[SIZE*(SIZE+5)];\
2348 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2351 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2352 int16_t tmp[SIZE*(SIZE+5)];\
2353 uint8_t halfH[SIZE*SIZE];\
2354 uint8_t halfHV[SIZE*SIZE];\
2355 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2356 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2357 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2360 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2361 int16_t tmp[SIZE*(SIZE+5)];\
2362 uint8_t halfH[SIZE*SIZE];\
2363 uint8_t halfHV[SIZE*SIZE];\
2364 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2365 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2366 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2369 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2370 uint8_t full[SIZE*(SIZE+5)];\
2371 uint8_t * const full_mid= full + SIZE*2;\
2372 int16_t tmp[SIZE*(SIZE+5)];\
2373 uint8_t halfV[SIZE*SIZE];\
2374 uint8_t halfHV[SIZE*SIZE];\
2375 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2376 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2377 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2378 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2381 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2382 uint8_t full[SIZE*(SIZE+5)];\
2383 uint8_t * const full_mid= full + SIZE*2;\
2384 int16_t tmp[SIZE*(SIZE+5)];\
2385 uint8_t halfV[SIZE*SIZE];\
2386 uint8_t halfHV[SIZE*SIZE];\
2387 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2388 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2389 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2390 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2393 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2394 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2395 #define op_put(a, b) a = cm[((b) + 16)>>5]
2396 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2397 #define op2_put(a, b) a = cm[((b) + 512)>>10]
2399 H264_LOWPASS(put_
, op_put
, op2_put
)
2400 H264_LOWPASS(avg_
, op_avg
, op2_avg
)
2414 #define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom )
2415 #define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
2416 #define H264_WEIGHT(W,H) \
2417 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
2418 int attribute_unused x, y; \
2419 offset <<= log2_denom; \
2420 if(log2_denom) offset += 1<<(log2_denom-1); \
2421 for(y=0; y<H; y++, block += stride){ \
2424 if(W==2) continue; \
2427 if(W==4) continue; \
2432 if(W==8) continue; \
2443 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \
2444 int attribute_unused x, y; \
2445 int offset = (offsets + offsetd + 1) >> 1; \
2446 offset = ((offset << 1) + 1) << log2_denom; \
2447 for(y=0; y<H; y++, dst += stride, src += stride){ \
2450 if(W==2) continue; \
2453 if(W==4) continue; \
2458 if(W==8) continue; \
2485 static void wmv2_mspel8_h_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
){
2486 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
2490 dst
[0]= cm
[(9*(src
[0] + src
[1]) - (src
[-1] + src
[2]) + 8)>>4];
2491 dst
[1]= cm
[(9*(src
[1] + src
[2]) - (src
[ 0] + src
[3]) + 8)>>4];
2492 dst
[2]= cm
[(9*(src
[2] + src
[3]) - (src
[ 1] + src
[4]) + 8)>>4];
2493 dst
[3]= cm
[(9*(src
[3] + src
[4]) - (src
[ 2] + src
[5]) + 8)>>4];
2494 dst
[4]= cm
[(9*(src
[4] + src
[5]) - (src
[ 3] + src
[6]) + 8)>>4];
2495 dst
[5]= cm
[(9*(src
[5] + src
[6]) - (src
[ 4] + src
[7]) + 8)>>4];
2496 dst
[6]= cm
[(9*(src
[6] + src
[7]) - (src
[ 5] + src
[8]) + 8)>>4];
2497 dst
[7]= cm
[(9*(src
[7] + src
[8]) - (src
[ 6] + src
[9]) + 8)>>4];
2503 static void wmv2_mspel8_v_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int w
){
2504 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
2508 const int src_1
= src
[ -srcStride
];
2509 const int src0
= src
[0 ];
2510 const int src1
= src
[ srcStride
];
2511 const int src2
= src
[2*srcStride
];
2512 const int src3
= src
[3*srcStride
];
2513 const int src4
= src
[4*srcStride
];
2514 const int src5
= src
[5*srcStride
];
2515 const int src6
= src
[6*srcStride
];
2516 const int src7
= src
[7*srcStride
];
2517 const int src8
= src
[8*srcStride
];
2518 const int src9
= src
[9*srcStride
];
2519 dst
[0*dstStride
]= cm
[(9*(src0
+ src1
) - (src_1
+ src2
) + 8)>>4];
2520 dst
[1*dstStride
]= cm
[(9*(src1
+ src2
) - (src0
+ src3
) + 8)>>4];
2521 dst
[2*dstStride
]= cm
[(9*(src2
+ src3
) - (src1
+ src4
) + 8)>>4];
2522 dst
[3*dstStride
]= cm
[(9*(src3
+ src4
) - (src2
+ src5
) + 8)>>4];
2523 dst
[4*dstStride
]= cm
[(9*(src4
+ src5
) - (src3
+ src6
) + 8)>>4];
2524 dst
[5*dstStride
]= cm
[(9*(src5
+ src6
) - (src4
+ src7
) + 8)>>4];
2525 dst
[6*dstStride
]= cm
[(9*(src6
+ src7
) - (src5
+ src8
) + 8)>>4];
2526 dst
[7*dstStride
]= cm
[(9*(src7
+ src8
) - (src6
+ src9
) + 8)>>4];
2532 static void put_mspel8_mc00_c (uint8_t *dst
, uint8_t *src
, int stride
){
2533 put_pixels8_c(dst
, src
, stride
, 8);
2536 static void put_mspel8_mc10_c(uint8_t *dst
, uint8_t *src
, int stride
){
2538 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2539 put_pixels8_l2(dst
, src
, half
, stride
, stride
, 8, 8);
2542 static void put_mspel8_mc20_c(uint8_t *dst
, uint8_t *src
, int stride
){
2543 wmv2_mspel8_h_lowpass(dst
, src
, stride
, stride
, 8);
2546 static void put_mspel8_mc30_c(uint8_t *dst
, uint8_t *src
, int stride
){
2548 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2549 put_pixels8_l2(dst
, src
+1, half
, stride
, stride
, 8, 8);
2552 static void put_mspel8_mc02_c(uint8_t *dst
, uint8_t *src
, int stride
){
2553 wmv2_mspel8_v_lowpass(dst
, src
, stride
, stride
, 8);
2556 static void put_mspel8_mc12_c(uint8_t *dst
, uint8_t *src
, int stride
){
2560 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2561 wmv2_mspel8_v_lowpass(halfV
, src
, 8, stride
, 8);
2562 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2563 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2565 static void put_mspel8_mc32_c(uint8_t *dst
, uint8_t *src
, int stride
){
2569 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2570 wmv2_mspel8_v_lowpass(halfV
, src
+1, 8, stride
, 8);
2571 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2572 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2574 static void put_mspel8_mc22_c(uint8_t *dst
, uint8_t *src
, int stride
){
2576 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2577 wmv2_mspel8_v_lowpass(dst
, halfH
+8, stride
, 8, 8);
2580 static void h263_v_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2582 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2586 int p0
= src
[x
-2*stride
];
2587 int p1
= src
[x
-1*stride
];
2588 int p2
= src
[x
+0*stride
];
2589 int p3
= src
[x
+1*stride
];
2590 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2592 if (d
<-2*strength
) d1
= 0;
2593 else if(d
<- strength
) d1
=-2*strength
- d
;
2594 else if(d
< strength
) d1
= d
;
2595 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2600 if(p1
&256) p1
= ~(p1
>>31);
2601 if(p2
&256) p2
= ~(p2
>>31);
2603 src
[x
-1*stride
] = p1
;
2604 src
[x
+0*stride
] = p2
;
2608 d2
= clip((p0
-p3
)/4, -ad1
, ad1
);
2610 src
[x
-2*stride
] = p0
- d2
;
2611 src
[x
+ stride
] = p3
+ d2
;
2615 static void h263_h_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2617 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2621 int p0
= src
[y
*stride
-2];
2622 int p1
= src
[y
*stride
-1];
2623 int p2
= src
[y
*stride
+0];
2624 int p3
= src
[y
*stride
+1];
2625 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2627 if (d
<-2*strength
) d1
= 0;
2628 else if(d
<- strength
) d1
=-2*strength
- d
;
2629 else if(d
< strength
) d1
= d
;
2630 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2635 if(p1
&256) p1
= ~(p1
>>31);
2636 if(p2
&256) p2
= ~(p2
>>31);
2638 src
[y
*stride
-1] = p1
;
2639 src
[y
*stride
+0] = p2
;
2643 d2
= clip((p0
-p3
)/4, -ad1
, ad1
);
2645 src
[y
*stride
-2] = p0
- d2
;
2646 src
[y
*stride
+1] = p3
+ d2
;
2650 static void h261_loop_filter_c(uint8_t *src
, int stride
){
2655 temp
[x
] = 4*src
[x
];
2656 temp
[x
+ 7*8] = 4*src
[x
+ 7*stride
];
2660 xy
= y
* stride
+ x
;
2662 temp
[yz
] = src
[xy
- stride
] + 2*src
[xy
] + src
[xy
+ stride
];
2667 src
[ y
*stride
] = (temp
[ y
*8] + 2)>>2;
2668 src
[7+y
*stride
] = (temp
[7+y
*8] + 2)>>2;
2670 xy
= y
* stride
+ x
;
2672 src
[xy
] = (temp
[yz
-1] + 2*temp
[yz
] + temp
[yz
+1] + 8)>>4;
2677 static inline void h264_loop_filter_luma_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
, int8_t *tc0
)
2680 for( i
= 0; i
< 4; i
++ ) {
2685 for( d
= 0; d
< 4; d
++ ) {
2686 const int p0
= pix
[-1*xstride
];
2687 const int p1
= pix
[-2*xstride
];
2688 const int p2
= pix
[-3*xstride
];
2689 const int q0
= pix
[0];
2690 const int q1
= pix
[1*xstride
];
2691 const int q2
= pix
[2*xstride
];
2693 if( ABS( p0
- q0
) < alpha
&&
2694 ABS( p1
- p0
) < beta
&&
2695 ABS( q1
- q0
) < beta
) {
2700 if( ABS( p2
- p0
) < beta
) {
2701 pix
[-2*xstride
] = p1
+ clip( (( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - p1
, -tc0
[i
], tc0
[i
] );
2704 if( ABS( q2
- q0
) < beta
) {
2705 pix
[ xstride
] = q1
+ clip( (( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - q1
, -tc0
[i
], tc0
[i
] );
2709 i_delta
= clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
2710 pix
[-xstride
] = clip_uint8( p0
+ i_delta
); /* p0' */
2711 pix
[0] = clip_uint8( q0
- i_delta
); /* q0' */
2717 static void h264_v_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2719 h264_loop_filter_luma_c(pix
, stride
, 1, alpha
, beta
, tc0
);
2721 static void h264_h_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2723 h264_loop_filter_luma_c(pix
, 1, stride
, alpha
, beta
, tc0
);
2726 static inline void h264_loop_filter_chroma_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
, int8_t *tc0
)
2729 for( i
= 0; i
< 4; i
++ ) {
2730 const int tc
= tc0
[i
];
2735 for( d
= 0; d
< 2; d
++ ) {
2736 const int p0
= pix
[-1*xstride
];
2737 const int p1
= pix
[-2*xstride
];
2738 const int q0
= pix
[0];
2739 const int q1
= pix
[1*xstride
];
2741 if( ABS( p0
- q0
) < alpha
&&
2742 ABS( p1
- p0
) < beta
&&
2743 ABS( q1
- q0
) < beta
) {
2745 int delta
= clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
2747 pix
[-xstride
] = clip_uint8( p0
+ delta
); /* p0' */
2748 pix
[0] = clip_uint8( q0
- delta
); /* q0' */
2754 static void h264_v_loop_filter_chroma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2756 h264_loop_filter_chroma_c(pix
, stride
, 1, alpha
, beta
, tc0
);
2758 static void h264_h_loop_filter_chroma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2760 h264_loop_filter_chroma_c(pix
, 1, stride
, alpha
, beta
, tc0
);
2763 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
)
2766 for( d
= 0; d
< 8; d
++ ) {
2767 const int p0
= pix
[-1*xstride
];
2768 const int p1
= pix
[-2*xstride
];
2769 const int q0
= pix
[0];
2770 const int q1
= pix
[1*xstride
];
2772 if( ABS( p0
- q0
) < alpha
&&
2773 ABS( p1
- p0
) < beta
&&
2774 ABS( q1
- q0
) < beta
) {
2776 pix
[-xstride
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
2777 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
2782 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix
, int stride
, int alpha
, int beta
)
2784 h264_loop_filter_chroma_intra_c(pix
, stride
, 1, alpha
, beta
);
2786 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix
, int stride
, int alpha
, int beta
)
2788 h264_loop_filter_chroma_intra_c(pix
, 1, stride
, alpha
, beta
);
2791 static inline int pix_abs16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
2797 s
+= abs(pix1
[0] - pix2
[0]);
2798 s
+= abs(pix1
[1] - pix2
[1]);
2799 s
+= abs(pix1
[2] - pix2
[2]);
2800 s
+= abs(pix1
[3] - pix2
[3]);
2801 s
+= abs(pix1
[4] - pix2
[4]);
2802 s
+= abs(pix1
[5] - pix2
[5]);
2803 s
+= abs(pix1
[6] - pix2
[6]);
2804 s
+= abs(pix1
[7] - pix2
[7]);
2805 s
+= abs(pix1
[8] - pix2
[8]);
2806 s
+= abs(pix1
[9] - pix2
[9]);
2807 s
+= abs(pix1
[10] - pix2
[10]);
2808 s
+= abs(pix1
[11] - pix2
[11]);
2809 s
+= abs(pix1
[12] - pix2
[12]);
2810 s
+= abs(pix1
[13] - pix2
[13]);
2811 s
+= abs(pix1
[14] - pix2
[14]);
2812 s
+= abs(pix1
[15] - pix2
[15]);
2819 static int pix_abs16_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
2825 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
2826 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
2827 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
2828 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
2829 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
2830 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
2831 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
2832 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
2833 s
+= abs(pix1
[8] - avg2(pix2
[8], pix2
[9]));
2834 s
+= abs(pix1
[9] - avg2(pix2
[9], pix2
[10]));
2835 s
+= abs(pix1
[10] - avg2(pix2
[10], pix2
[11]));
2836 s
+= abs(pix1
[11] - avg2(pix2
[11], pix2
[12]));
2837 s
+= abs(pix1
[12] - avg2(pix2
[12], pix2
[13]));
2838 s
+= abs(pix1
[13] - avg2(pix2
[13], pix2
[14]));
2839 s
+= abs(pix1
[14] - avg2(pix2
[14], pix2
[15]));
2840 s
+= abs(pix1
[15] - avg2(pix2
[15], pix2
[16]));
2847 static int pix_abs16_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
2850 uint8_t *pix3
= pix2
+ line_size
;
2854 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
2855 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
2856 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
2857 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
2858 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
2859 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
2860 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
2861 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
2862 s
+= abs(pix1
[8] - avg2(pix2
[8], pix3
[8]));
2863 s
+= abs(pix1
[9] - avg2(pix2
[9], pix3
[9]));
2864 s
+= abs(pix1
[10] - avg2(pix2
[10], pix3
[10]));
2865 s
+= abs(pix1
[11] - avg2(pix2
[11], pix3
[11]));
2866 s
+= abs(pix1
[12] - avg2(pix2
[12], pix3
[12]));
2867 s
+= abs(pix1
[13] - avg2(pix2
[13], pix3
[13]));
2868 s
+= abs(pix1
[14] - avg2(pix2
[14], pix3
[14]));
2869 s
+= abs(pix1
[15] - avg2(pix2
[15], pix3
[15]));
2877 static int pix_abs16_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
2880 uint8_t *pix3
= pix2
+ line_size
;
2884 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
2885 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
2886 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
2887 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
2888 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
2889 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
2890 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
2891 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
2892 s
+= abs(pix1
[8] - avg4(pix2
[8], pix2
[9], pix3
[8], pix3
[9]));
2893 s
+= abs(pix1
[9] - avg4(pix2
[9], pix2
[10], pix3
[9], pix3
[10]));
2894 s
+= abs(pix1
[10] - avg4(pix2
[10], pix2
[11], pix3
[10], pix3
[11]));
2895 s
+= abs(pix1
[11] - avg4(pix2
[11], pix2
[12], pix3
[11], pix3
[12]));
2896 s
+= abs(pix1
[12] - avg4(pix2
[12], pix2
[13], pix3
[12], pix3
[13]));
2897 s
+= abs(pix1
[13] - avg4(pix2
[13], pix2
[14], pix3
[13], pix3
[14]));
2898 s
+= abs(pix1
[14] - avg4(pix2
[14], pix2
[15], pix3
[14], pix3
[15]));
2899 s
+= abs(pix1
[15] - avg4(pix2
[15], pix2
[16], pix3
[15], pix3
[16]));
2907 static inline int pix_abs8_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
2913 s
+= abs(pix1
[0] - pix2
[0]);
2914 s
+= abs(pix1
[1] - pix2
[1]);
2915 s
+= abs(pix1
[2] - pix2
[2]);
2916 s
+= abs(pix1
[3] - pix2
[3]);
2917 s
+= abs(pix1
[4] - pix2
[4]);
2918 s
+= abs(pix1
[5] - pix2
[5]);
2919 s
+= abs(pix1
[6] - pix2
[6]);
2920 s
+= abs(pix1
[7] - pix2
[7]);
2927 static int pix_abs8_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
2933 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
2934 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
2935 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
2936 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
2937 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
2938 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
2939 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
2940 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
2947 static int pix_abs8_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
2950 uint8_t *pix3
= pix2
+ line_size
;
2954 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
2955 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
2956 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
2957 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
2958 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
2959 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
2960 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
2961 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
2969 static int pix_abs8_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
2972 uint8_t *pix3
= pix2
+ line_size
;
2976 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
2977 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
2978 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
2979 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
2980 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
2981 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
2982 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
2983 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
2991 static int nsse16_c(MpegEncContext
*c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
2997 for(x
=0; x
<16; x
++){
2998 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
3001 for(x
=0; x
<15; x
++){
3002 score2
+= ABS( s1
[x
] - s1
[x
+stride
]
3003 - s1
[x
+1] + s1
[x
+1+stride
])
3004 -ABS( s2
[x
] - s2
[x
+stride
]
3005 - s2
[x
+1] + s2
[x
+1+stride
]);
3012 if(c
) return score1
+ ABS(score2
)*c
->avctx
->nsse_weight
;
3013 else return score1
+ ABS(score2
)*8;
3016 static int nsse8_c(MpegEncContext
*c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3023 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
3027 score2
+= ABS( s1
[x
] - s1
[x
+stride
]
3028 - s1
[x
+1] + s1
[x
+1+stride
])
3029 -ABS( s2
[x
] - s2
[x
+stride
]
3030 - s2
[x
+1] + s2
[x
+1+stride
]);
3037 if(c
) return score1
+ ABS(score2
)*c
->avctx
->nsse_weight
;
3038 else return score1
+ ABS(score2
)*8;
3041 static int try_8x8basis_c(int16_t rem
[64], int16_t weight
[64], int16_t basis
[64], int scale
){
3045 for(i
=0; i
<8*8; i
++){
3046 int b
= rem
[i
] + ((basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
));
3049 assert(-512<b
&& b
<512);
3051 sum
+= (w
*b
)*(w
*b
)>>4;
3056 static void add_8x8basis_c(int16_t rem
[64], int16_t basis
[64], int scale
){
3059 for(i
=0; i
<8*8; i
++){
3060 rem
[i
] += (basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
);
3065 * permutes an 8x8 block.
3066 * @param block the block which will be permuted according to the given permutation vector
3067 * @param permutation the permutation vector
3068 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
3069 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
3070 * (inverse) permutated to scantable order!
3072 void ff_block_permute(DCTELEM
*block
, uint8_t *permutation
, const uint8_t *scantable
, int last
)
3078 //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
3080 for(i
=0; i
<=last
; i
++){
3081 const int j
= scantable
[i
];
3086 for(i
=0; i
<=last
; i
++){
3087 const int j
= scantable
[i
];
3088 const int perm_j
= permutation
[j
];
3089 block
[perm_j
]= temp
[j
];
3093 static int zero_cmp(void *s
, uint8_t *a
, uint8_t *b
, int stride
, int h
){
3097 void ff_set_cmp(DSPContext
* c
, me_cmp_func
*cmp
, int type
){
3100 memset(cmp
, 0, sizeof(void*)*5);
3108 cmp
[i
]= c
->hadamard8_diff
[i
];
3114 cmp
[i
]= c
->dct_sad
[i
];
3117 cmp
[i
]= c
->dct_max
[i
];
3120 cmp
[i
]= c
->quant_psnr
[i
];
3147 av_log(NULL
, AV_LOG_ERROR
,"internal error in cmp function selection\n");
3153 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
3155 static void clear_blocks_c(DCTELEM
*blocks
)
3157 memset(blocks
, 0, sizeof(DCTELEM
)*6*64);
3160 static void add_bytes_c(uint8_t *dst
, uint8_t *src
, int w
){
3162 for(i
=0; i
+7<w
; i
+=8){
3163 dst
[i
+0] += src
[i
+0];
3164 dst
[i
+1] += src
[i
+1];
3165 dst
[i
+2] += src
[i
+2];
3166 dst
[i
+3] += src
[i
+3];
3167 dst
[i
+4] += src
[i
+4];
3168 dst
[i
+5] += src
[i
+5];
3169 dst
[i
+6] += src
[i
+6];
3170 dst
[i
+7] += src
[i
+7];
3173 dst
[i
+0] += src
[i
+0];
3176 static void diff_bytes_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
){
3178 for(i
=0; i
+7<w
; i
+=8){
3179 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
3180 dst
[i
+1] = src1
[i
+1]-src2
[i
+1];
3181 dst
[i
+2] = src1
[i
+2]-src2
[i
+2];
3182 dst
[i
+3] = src1
[i
+3]-src2
[i
+3];
3183 dst
[i
+4] = src1
[i
+4]-src2
[i
+4];
3184 dst
[i
+5] = src1
[i
+5]-src2
[i
+5];
3185 dst
[i
+6] = src1
[i
+6]-src2
[i
+6];
3186 dst
[i
+7] = src1
[i
+7]-src2
[i
+7];
3189 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
3192 static void sub_hfyu_median_prediction_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
, int *left
, int *left_top
){
3200 const int pred
= mid_pred(l
, src1
[i
], (l
+ src1
[i
] - lt
)&0xFF);
3210 #define BUTTERFLY2(o1,o2,i1,i2) \
3214 #define BUTTERFLY1(x,y) \
3223 #define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
3225 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s
, uint8_t *dst
, uint8_t *src
, int stride
, int h
){
3233 //FIXME try pointer walks
3234 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0]-dst
[stride
*i
+0],src
[stride
*i
+1]-dst
[stride
*i
+1]);
3235 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2]-dst
[stride
*i
+2],src
[stride
*i
+3]-dst
[stride
*i
+3]);
3236 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4]-dst
[stride
*i
+4],src
[stride
*i
+5]-dst
[stride
*i
+5]);
3237 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6]-dst
[stride
*i
+6],src
[stride
*i
+7]-dst
[stride
*i
+7]);
3239 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
3240 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
3241 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
3242 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
3244 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
3245 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
3246 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
3247 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
3251 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
3252 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
3253 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
3254 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
3256 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
3257 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
3258 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
3259 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
3262 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
3263 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
3264 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
3265 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
3271 printf("MAX:%d\n", maxi
);
3277 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s
, uint8_t *src
, uint8_t *dummy
, int stride
, int h
){
3285 //FIXME try pointer walks
3286 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0],src
[stride
*i
+1]);
3287 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2],src
[stride
*i
+3]);
3288 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4],src
[stride
*i
+5]);
3289 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6],src
[stride
*i
+7]);
3291 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
3292 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
3293 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
3294 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
3296 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
3297 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
3298 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
3299 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
3303 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
3304 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
3305 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
3306 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
3308 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
3309 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
3310 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
3311 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
3314 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
3315 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
3316 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
3317 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
3320 sum
-= ABS(temp
[8*0] + temp
[8*4]); // -mean
3325 static int dct_sad8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3326 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3327 uint64_t __align8 aligned_temp
[sizeof(DCTELEM
)*64/8];
3328 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3333 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3342 static int dct_max8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3343 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3344 uint64_t __align8 aligned_temp
[sizeof(DCTELEM
)*64/8];
3345 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3350 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3354 sum
= FFMAX(sum
, ABS(temp
[i
]));
3359 void simple_idct(DCTELEM
*block
); //FIXME
3361 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3362 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3363 uint64_t __align8 aligned_temp
[sizeof(DCTELEM
)*64*2/8];
3364 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3365 DCTELEM
* const bak
= ((DCTELEM
*)aligned_temp
)+64;
3371 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3373 memcpy(bak
, temp
, 64*sizeof(DCTELEM
));
3375 s
->block_last_index
[0/*FIXME*/]= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3376 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
3377 simple_idct(temp
); //FIXME
3380 sum
+= (temp
[i
]-bak
[i
])*(temp
[i
]-bak
[i
]);
3385 static int rd8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3386 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3387 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
3388 uint64_t __align8 aligned_temp
[sizeof(DCTELEM
)*64/8];
3389 uint64_t __align8 aligned_bak
[stride
];
3390 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3391 uint8_t * const bak
= (uint8_t*)aligned_bak
;
3392 int i
, last
, run
, bits
, level
, distoration
, start_i
;
3393 const int esc_length
= s
->ac_esc_length
;
3395 uint8_t * last_length
;
3400 ((uint32_t*)(bak
+ i
*stride
))[0]= ((uint32_t*)(src2
+ i
*stride
))[0];
3401 ((uint32_t*)(bak
+ i
*stride
))[1]= ((uint32_t*)(src2
+ i
*stride
))[1];
3404 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3406 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3412 length
= s
->intra_ac_vlc_length
;
3413 last_length
= s
->intra_ac_vlc_last_length
;
3414 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
3417 length
= s
->inter_ac_vlc_length
;
3418 last_length
= s
->inter_ac_vlc_last_length
;
3423 for(i
=start_i
; i
<last
; i
++){
3424 int j
= scantable
[i
];
3429 if((level
&(~127)) == 0){
3430 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
3439 level
= temp
[i
] + 64;
3443 if((level
&(~127)) == 0){
3444 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
3452 s
->dct_unquantize_intra(s
, temp
, 0, s
->qscale
);
3454 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
3457 s
->dsp
.idct_add(bak
, stride
, temp
);
3459 distoration
= s
->dsp
.sse
[1](NULL
, bak
, src1
, stride
, 8);
3461 return distoration
+ ((bits
*s
->qscale
*s
->qscale
*109 + 64)>>7);
3464 static int bit8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3465 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3466 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
3467 uint64_t __align8 aligned_temp
[sizeof(DCTELEM
)*64/8];
3468 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3469 int i
, last
, run
, bits
, level
, start_i
;
3470 const int esc_length
= s
->ac_esc_length
;
3472 uint8_t * last_length
;
3476 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3478 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3484 length
= s
->intra_ac_vlc_length
;
3485 last_length
= s
->intra_ac_vlc_last_length
;
3486 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
3489 length
= s
->inter_ac_vlc_length
;
3490 last_length
= s
->inter_ac_vlc_last_length
;
3495 for(i
=start_i
; i
<last
; i
++){
3496 int j
= scantable
[i
];
3501 if((level
&(~127)) == 0){
3502 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
3511 level
= temp
[i
] + 64;
3515 if((level
&(~127)) == 0){
3516 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
3524 static int vsad_intra16_c(/*MpegEncContext*/ void *c
, uint8_t *s
, uint8_t *dummy
, int stride
, int h
){
3529 for(x
=0; x
<16; x
+=4){
3530 score
+= ABS(s
[x
] - s
[x
+stride
]) + ABS(s
[x
+1] - s
[x
+1+stride
])
3531 +ABS(s
[x
+2] - s
[x
+2+stride
]) + ABS(s
[x
+3] - s
[x
+3+stride
]);
3539 static int vsad16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3544 for(x
=0; x
<16; x
++){
3545 score
+= ABS(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
3554 #define SQ(a) ((a)*(a))
3555 static int vsse_intra16_c(/*MpegEncContext*/ void *c
, uint8_t *s
, uint8_t *dummy
, int stride
, int h
){
3560 for(x
=0; x
<16; x
+=4){
3561 score
+= SQ(s
[x
] - s
[x
+stride
]) + SQ(s
[x
+1] - s
[x
+1+stride
])
3562 +SQ(s
[x
+2] - s
[x
+2+stride
]) + SQ(s
[x
+3] - s
[x
+3+stride
]);
3570 static int vsse16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3575 for(x
=0; x
<16; x
++){
3576 score
+= SQ(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
3585 WARPER8_16_SQ(hadamard8_diff8x8_c
, hadamard8_diff16_c
)
3586 WARPER8_16_SQ(hadamard8_intra8x8_c
, hadamard8_intra16_c
)
3587 WARPER8_16_SQ(dct_sad8x8_c
, dct_sad16_c
)
3588 WARPER8_16_SQ(dct_max8x8_c
, dct_max16_c
)
3589 WARPER8_16_SQ(quant_psnr8x8_c
, quant_psnr16_c
)
3590 WARPER8_16_SQ(rd8x8_c
, rd16_c
)
3591 WARPER8_16_SQ(bit8x8_c
, bit16_c
)
3593 /* XXX: those functions should be suppressed ASAP when all IDCTs are
3595 static void ff_jref_idct_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
3598 put_pixels_clamped_c(block
, dest
, line_size
);
3600 static void ff_jref_idct_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
3603 add_pixels_clamped_c(block
, dest
, line_size
);
3606 static void ff_jref_idct4_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
3609 put_pixels_clamped4_c(block
, dest
, line_size
);
3611 static void ff_jref_idct4_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
3614 add_pixels_clamped4_c(block
, dest
, line_size
);
3617 static void ff_jref_idct2_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
3620 put_pixels_clamped2_c(block
, dest
, line_size
);
3622 static void ff_jref_idct2_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
3625 add_pixels_clamped2_c(block
, dest
, line_size
);
3628 static void ff_jref_idct1_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
3630 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
3632 dest
[0] = cm
[(block
[0] + 4)>>3];
3634 static void ff_jref_idct1_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
3636 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
3638 dest
[0] = cm
[dest
[0] + ((block
[0] + 4)>>3)];
3641 /* init static data */
3642 void dsputil_static_init(void)
3646 for(i
=0;i
<256;i
++) cropTbl
[i
+ MAX_NEG_CROP
] = i
;
3647 for(i
=0;i
<MAX_NEG_CROP
;i
++) {
3649 cropTbl
[i
+ MAX_NEG_CROP
+ 256] = 255;
3652 for(i
=0;i
<512;i
++) {
3653 squareTbl
[i
] = (i
- 256) * (i
- 256);
3656 for(i
=0; i
<64; i
++) inv_zigzag_direct16
[ff_zigzag_direct
[i
]]= i
+1;
3660 void dsputil_init(DSPContext
* c
, AVCodecContext
*avctx
)
3664 #ifdef CONFIG_ENCODERS
3665 if(avctx
->dct_algo
==FF_DCT_FASTINT
) {
3666 c
->fdct
= fdct_ifast
;
3667 c
->fdct248
= fdct_ifast248
;
3669 else if(avctx
->dct_algo
==FF_DCT_FAAN
) {
3670 c
->fdct
= ff_faandct
;
3671 c
->fdct248
= ff_faandct248
;
3674 c
->fdct
= ff_jpeg_fdct_islow
; //slow/accurate/default
3675 c
->fdct248
= ff_fdct248_islow
;
3677 #endif //CONFIG_ENCODERS
3679 if(avctx
->lowres
==1){
3680 if(avctx
->idct_algo
==FF_IDCT_INT
|| avctx
->idct_algo
==FF_IDCT_AUTO
){
3681 c
->idct_put
= ff_jref_idct4_put
;
3682 c
->idct_add
= ff_jref_idct4_add
;
3684 c
->idct_put
= ff_h264_lowres_idct_put_c
;
3685 c
->idct_add
= ff_h264_lowres_idct_add_c
;
3687 c
->idct
= j_rev_dct4
;
3688 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
3689 }else if(avctx
->lowres
==2){
3690 c
->idct_put
= ff_jref_idct2_put
;
3691 c
->idct_add
= ff_jref_idct2_add
;
3692 c
->idct
= j_rev_dct2
;
3693 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
3694 }else if(avctx
->lowres
==3){
3695 c
->idct_put
= ff_jref_idct1_put
;
3696 c
->idct_add
= ff_jref_idct1_add
;
3697 c
->idct
= j_rev_dct1
;
3698 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
3700 if(avctx
->idct_algo
==FF_IDCT_INT
){
3701 c
->idct_put
= ff_jref_idct_put
;
3702 c
->idct_add
= ff_jref_idct_add
;
3703 c
->idct
= j_rev_dct
;
3704 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
3705 }else if(avctx
->idct_algo
==FF_IDCT_VP3
){
3706 c
->idct_put
= ff_vp3_idct_put_c
;
3707 c
->idct_add
= ff_vp3_idct_add_c
;
3708 c
->idct
= ff_vp3_idct_c
;
3709 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
3710 }else{ //accurate/default
3711 c
->idct_put
= simple_idct_put
;
3712 c
->idct_add
= simple_idct_add
;
3713 c
->idct
= simple_idct
;
3714 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
3718 c
->h264_idct_add
= ff_h264_idct_add_c
;
3719 c
->h264_idct8_add
= ff_h264_idct8_add_c
;
3721 c
->get_pixels
= get_pixels_c
;
3722 c
->diff_pixels
= diff_pixels_c
;
3723 c
->put_pixels_clamped
= put_pixels_clamped_c
;
3724 c
->put_signed_pixels_clamped
= put_signed_pixels_clamped_c
;
3725 c
->add_pixels_clamped
= add_pixels_clamped_c
;
3726 c
->add_pixels8
= add_pixels8_c
;
3727 c
->add_pixels4
= add_pixels4_c
;
3730 c
->clear_blocks
= clear_blocks_c
;
3731 c
->pix_sum
= pix_sum_c
;
3732 c
->pix_norm1
= pix_norm1_c
;
3734 /* TODO [0] 16 [1] 8 */
3735 c
->pix_abs
[0][0] = pix_abs16_c
;
3736 c
->pix_abs
[0][1] = pix_abs16_x2_c
;
3737 c
->pix_abs
[0][2] = pix_abs16_y2_c
;
3738 c
->pix_abs
[0][3] = pix_abs16_xy2_c
;
3739 c
->pix_abs
[1][0] = pix_abs8_c
;
3740 c
->pix_abs
[1][1] = pix_abs8_x2_c
;
3741 c
->pix_abs
[1][2] = pix_abs8_y2_c
;
3742 c
->pix_abs
[1][3] = pix_abs8_xy2_c
;
3744 #define dspfunc(PFX, IDX, NUM) \
3745 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
3746 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
3747 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
3748 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
3750 dspfunc(put
, 0, 16);
3751 dspfunc(put_no_rnd
, 0, 16);
3753 dspfunc(put_no_rnd
, 1, 8);
3757 dspfunc(avg
, 0, 16);
3758 dspfunc(avg_no_rnd
, 0, 16);
3760 dspfunc(avg_no_rnd
, 1, 8);
3765 c
->put_no_rnd_pixels_l2
[0]= put_no_rnd_pixels16_l2_c
;
3766 c
->put_no_rnd_pixels_l2
[1]= put_no_rnd_pixels8_l2_c
;
3768 c
->put_tpel_pixels_tab
[ 0] = put_tpel_pixels_mc00_c
;
3769 c
->put_tpel_pixels_tab
[ 1] = put_tpel_pixels_mc10_c
;
3770 c
->put_tpel_pixels_tab
[ 2] = put_tpel_pixels_mc20_c
;
3771 c
->put_tpel_pixels_tab
[ 4] = put_tpel_pixels_mc01_c
;
3772 c
->put_tpel_pixels_tab
[ 5] = put_tpel_pixels_mc11_c
;
3773 c
->put_tpel_pixels_tab
[ 6] = put_tpel_pixels_mc21_c
;
3774 c
->put_tpel_pixels_tab
[ 8] = put_tpel_pixels_mc02_c
;
3775 c
->put_tpel_pixels_tab
[ 9] = put_tpel_pixels_mc12_c
;
3776 c
->put_tpel_pixels_tab
[10] = put_tpel_pixels_mc22_c
;
3778 c
->avg_tpel_pixels_tab
[ 0] = avg_tpel_pixels_mc00_c
;
3779 c
->avg_tpel_pixels_tab
[ 1] = avg_tpel_pixels_mc10_c
;
3780 c
->avg_tpel_pixels_tab
[ 2] = avg_tpel_pixels_mc20_c
;
3781 c
->avg_tpel_pixels_tab
[ 4] = avg_tpel_pixels_mc01_c
;
3782 c
->avg_tpel_pixels_tab
[ 5] = avg_tpel_pixels_mc11_c
;
3783 c
->avg_tpel_pixels_tab
[ 6] = avg_tpel_pixels_mc21_c
;
3784 c
->avg_tpel_pixels_tab
[ 8] = avg_tpel_pixels_mc02_c
;
3785 c
->avg_tpel_pixels_tab
[ 9] = avg_tpel_pixels_mc12_c
;
3786 c
->avg_tpel_pixels_tab
[10] = avg_tpel_pixels_mc22_c
;
3788 #define dspfunc(PFX, IDX, NUM) \
3789 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
3790 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
3791 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
3792 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
3793 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
3794 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
3795 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
3796 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
3797 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
3798 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
3799 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
3800 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
3801 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
3802 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
3803 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
3804 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
3806 dspfunc(put_qpel
, 0, 16);
3807 dspfunc(put_no_rnd_qpel
, 0, 16);
3809 dspfunc(avg_qpel
, 0, 16);
3810 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
3812 dspfunc(put_qpel
, 1, 8);
3813 dspfunc(put_no_rnd_qpel
, 1, 8);
3815 dspfunc(avg_qpel
, 1, 8);
3816 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
3818 dspfunc(put_h264_qpel
, 0, 16);
3819 dspfunc(put_h264_qpel
, 1, 8);
3820 dspfunc(put_h264_qpel
, 2, 4);
3821 dspfunc(avg_h264_qpel
, 0, 16);
3822 dspfunc(avg_h264_qpel
, 1, 8);
3823 dspfunc(avg_h264_qpel
, 2, 4);
3826 c
->put_h264_chroma_pixels_tab
[0]= put_h264_chroma_mc8_c
;
3827 c
->put_h264_chroma_pixels_tab
[1]= put_h264_chroma_mc4_c
;
3828 c
->put_h264_chroma_pixels_tab
[2]= put_h264_chroma_mc2_c
;
3829 c
->avg_h264_chroma_pixels_tab
[0]= avg_h264_chroma_mc8_c
;
3830 c
->avg_h264_chroma_pixels_tab
[1]= avg_h264_chroma_mc4_c
;
3831 c
->avg_h264_chroma_pixels_tab
[2]= avg_h264_chroma_mc2_c
;
3833 c
->weight_h264_pixels_tab
[0]= weight_h264_pixels16x16_c
;
3834 c
->weight_h264_pixels_tab
[1]= weight_h264_pixels16x8_c
;
3835 c
->weight_h264_pixels_tab
[2]= weight_h264_pixels8x16_c
;
3836 c
->weight_h264_pixels_tab
[3]= weight_h264_pixels8x8_c
;
3837 c
->weight_h264_pixels_tab
[4]= weight_h264_pixels8x4_c
;
3838 c
->weight_h264_pixels_tab
[5]= weight_h264_pixels4x8_c
;
3839 c
->weight_h264_pixels_tab
[6]= weight_h264_pixels4x4_c
;
3840 c
->weight_h264_pixels_tab
[7]= weight_h264_pixels4x2_c
;
3841 c
->weight_h264_pixels_tab
[8]= weight_h264_pixels2x4_c
;
3842 c
->weight_h264_pixels_tab
[9]= weight_h264_pixels2x2_c
;
3843 c
->biweight_h264_pixels_tab
[0]= biweight_h264_pixels16x16_c
;
3844 c
->biweight_h264_pixels_tab
[1]= biweight_h264_pixels16x8_c
;
3845 c
->biweight_h264_pixels_tab
[2]= biweight_h264_pixels8x16_c
;
3846 c
->biweight_h264_pixels_tab
[3]= biweight_h264_pixels8x8_c
;
3847 c
->biweight_h264_pixels_tab
[4]= biweight_h264_pixels8x4_c
;
3848 c
->biweight_h264_pixels_tab
[5]= biweight_h264_pixels4x8_c
;
3849 c
->biweight_h264_pixels_tab
[6]= biweight_h264_pixels4x4_c
;
3850 c
->biweight_h264_pixels_tab
[7]= biweight_h264_pixels4x2_c
;
3851 c
->biweight_h264_pixels_tab
[8]= biweight_h264_pixels2x4_c
;
3852 c
->biweight_h264_pixels_tab
[9]= biweight_h264_pixels2x2_c
;
3854 c
->put_mspel_pixels_tab
[0]= put_mspel8_mc00_c
;
3855 c
->put_mspel_pixels_tab
[1]= put_mspel8_mc10_c
;
3856 c
->put_mspel_pixels_tab
[2]= put_mspel8_mc20_c
;
3857 c
->put_mspel_pixels_tab
[3]= put_mspel8_mc30_c
;
3858 c
->put_mspel_pixels_tab
[4]= put_mspel8_mc02_c
;
3859 c
->put_mspel_pixels_tab
[5]= put_mspel8_mc12_c
;
3860 c
->put_mspel_pixels_tab
[6]= put_mspel8_mc22_c
;
3861 c
->put_mspel_pixels_tab
[7]= put_mspel8_mc32_c
;
3863 #define SET_CMP_FUNC(name) \
3864 c->name[0]= name ## 16_c;\
3865 c->name[1]= name ## 8x8_c;
3867 SET_CMP_FUNC(hadamard8_diff
)
3868 c
->hadamard8_diff
[4]= hadamard8_intra16_c
;
3869 SET_CMP_FUNC(dct_sad
)
3870 SET_CMP_FUNC(dct_max
)
3871 c
->sad
[0]= pix_abs16_c
;
3872 c
->sad
[1]= pix_abs8_c
;
3876 SET_CMP_FUNC(quant_psnr
)
3879 c
->vsad
[0]= vsad16_c
;
3880 c
->vsad
[4]= vsad_intra16_c
;
3881 c
->vsse
[0]= vsse16_c
;
3882 c
->vsse
[4]= vsse_intra16_c
;
3883 c
->nsse
[0]= nsse16_c
;
3884 c
->nsse
[1]= nsse8_c
;
3885 c
->w53
[0]= w53_16_c
;
3887 c
->w97
[0]= w97_16_c
;
3890 c
->add_bytes
= add_bytes_c
;
3891 c
->diff_bytes
= diff_bytes_c
;
3892 c
->sub_hfyu_median_prediction
= sub_hfyu_median_prediction_c
;
3893 c
->bswap_buf
= bswap_buf
;
3895 c
->h264_v_loop_filter_luma
= h264_v_loop_filter_luma_c
;
3896 c
->h264_h_loop_filter_luma
= h264_h_loop_filter_luma_c
;
3897 c
->h264_v_loop_filter_chroma
= h264_v_loop_filter_chroma_c
;
3898 c
->h264_h_loop_filter_chroma
= h264_h_loop_filter_chroma_c
;
3899 c
->h264_v_loop_filter_chroma_intra
= h264_v_loop_filter_chroma_intra_c
;
3900 c
->h264_h_loop_filter_chroma_intra
= h264_h_loop_filter_chroma_intra_c
;
3902 c
->h263_h_loop_filter
= h263_h_loop_filter_c
;
3903 c
->h263_v_loop_filter
= h263_v_loop_filter_c
;
3905 c
->h261_loop_filter
= h261_loop_filter_c
;
3907 c
->try_8x8basis
= try_8x8basis_c
;
3908 c
->add_8x8basis
= add_8x8basis_c
;
3911 dsputil_init_mmx(c
, avctx
);
3914 dsputil_init_armv4l(c
, avctx
);
3917 dsputil_init_mlib(c
, avctx
);
3920 dsputil_init_vis(c
,avctx
);
3923 dsputil_init_alpha(c
, avctx
);
3926 dsputil_init_ppc(c
, avctx
);
3929 dsputil_init_mmi(c
, avctx
);
3932 dsputil_init_sh4(c
,avctx
);
3935 switch(c
->idct_permutation_type
){
3936 case FF_NO_IDCT_PERM
:
3938 c
->idct_permutation
[i
]= i
;
3940 case FF_LIBMPEG2_IDCT_PERM
:
3942 c
->idct_permutation
[i
]= (i
& 0x38) | ((i
& 6) >> 1) | ((i
& 1) << 2);
3944 case FF_SIMPLE_IDCT_PERM
:
3946 c
->idct_permutation
[i
]= simple_mmx_permutation
[i
];
3948 case FF_TRANSPOSE_IDCT_PERM
:
3950 c
->idct_permutation
[i
]= ((i
&7)<<3) | (i
>>3);
3952 case FF_PARTTRANS_IDCT_PERM
:
3954 c
->idct_permutation
[i
]= (i
&0x24) | ((i
&3)<<3) | ((i
>>3)&3);
3957 av_log(avctx
, AV_LOG_ERROR
, "Internal error, IDCT permutation not set\n");