3 * Copyright (c) 2000, 2001 Fabrice Bellard.
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32 #include "simple_idct.h"
39 void ff_spatial_dwt(int *buffer
, int width
, int height
, int stride
, int type
, int decomposition_count
);
42 void vorbis_inverse_coupling(float *mag
, float *ang
, int blocksize
);
45 void ff_flac_compute_autocorr(const int32_t *data
, int len
, int lag
, double *autoc
);
48 void ff_add_png_paeth_prediction(uint8_t *dst
, uint8_t *src
, uint8_t *top
, int w
, int bpp
);
50 uint8_t ff_cropTbl
[256 + 2 * MAX_NEG_CROP
] = {0, };
51 uint32_t ff_squareTbl
[512] = {0, };
53 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
54 #define pb_7f (~0UL/255 * 0x7f)
55 #define pb_80 (~0UL/255 * 0x80)
57 const uint8_t ff_zigzag_direct
[64] = {
58 0, 1, 8, 16, 9, 2, 3, 10,
59 17, 24, 32, 25, 18, 11, 4, 5,
60 12, 19, 26, 33, 40, 48, 41, 34,
61 27, 20, 13, 6, 7, 14, 21, 28,
62 35, 42, 49, 56, 57, 50, 43, 36,
63 29, 22, 15, 23, 30, 37, 44, 51,
64 58, 59, 52, 45, 38, 31, 39, 46,
65 53, 60, 61, 54, 47, 55, 62, 63
68 /* Specific zigzag scan for 248 idct. NOTE that unlike the
69 specification, we interleave the fields */
70 const uint8_t ff_zigzag248_direct
[64] = {
71 0, 8, 1, 9, 16, 24, 2, 10,
72 17, 25, 32, 40, 48, 56, 33, 41,
73 18, 26, 3, 11, 4, 12, 19, 27,
74 34, 42, 49, 57, 50, 58, 35, 43,
75 20, 28, 5, 13, 6, 14, 21, 29,
76 36, 44, 51, 59, 52, 60, 37, 45,
77 22, 30, 7, 15, 23, 31, 38, 46,
78 53, 61, 54, 62, 39, 47, 55, 63,
81 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
82 DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16
[64]) = {0, };
84 const uint8_t ff_alternate_horizontal_scan
[64] = {
85 0, 1, 2, 3, 8, 9, 16, 17,
86 10, 11, 4, 5, 6, 7, 15, 14,
87 13, 12, 19, 18, 24, 25, 32, 33,
88 26, 27, 20, 21, 22, 23, 28, 29,
89 30, 31, 34, 35, 40, 41, 48, 49,
90 42, 43, 36, 37, 38, 39, 44, 45,
91 46, 47, 50, 51, 56, 57, 58, 59,
92 52, 53, 54, 55, 60, 61, 62, 63,
95 const uint8_t ff_alternate_vertical_scan
[64] = {
96 0, 8, 16, 24, 1, 9, 2, 10,
97 17, 25, 32, 40, 48, 56, 57, 49,
98 41, 33, 26, 18, 3, 11, 4, 12,
99 19, 27, 34, 42, 50, 58, 35, 43,
100 51, 59, 20, 28, 5, 13, 6, 14,
101 21, 29, 36, 44, 52, 60, 37, 45,
102 53, 61, 22, 30, 7, 15, 23, 31,
103 38, 46, 54, 62, 39, 47, 55, 63,
106 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
107 const uint32_t ff_inverse
[256]={
108 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
109 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
110 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
111 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
112 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
113 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
114 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
115 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
116 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
117 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
118 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
119 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
120 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
121 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
122 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
123 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
124 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
125 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
126 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
127 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
128 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
129 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
130 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
131 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
132 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
133 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
134 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
135 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
136 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
137 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
138 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
139 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
142 /* Input permutation for the simple_idct_mmx */
143 static const uint8_t simple_mmx_permutation
[64]={
144 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
145 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
146 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
147 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
148 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
149 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
150 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
151 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
154 static const uint8_t idct_sse2_row_perm
[8] = {0, 4, 1, 5, 2, 6, 3, 7};
156 void ff_init_scantable(uint8_t *permutation
, ScanTable
*st
, const uint8_t *src_scantable
){
160 st
->scantable
= src_scantable
;
164 j
= src_scantable
[i
];
165 st
->permutated
[i
] = permutation
[j
];
174 j
= st
->permutated
[i
];
176 st
->raster_end
[i
]= end
;
180 static int pix_sum_c(uint8_t * pix
, int line_size
)
185 for (i
= 0; i
< 16; i
++) {
186 for (j
= 0; j
< 16; j
+= 8) {
197 pix
+= line_size
- 16;
202 static int pix_norm1_c(uint8_t * pix
, int line_size
)
205 uint32_t *sq
= ff_squareTbl
+ 256;
208 for (i
= 0; i
< 16; i
++) {
209 for (j
= 0; j
< 16; j
+= 8) {
220 #if LONG_MAX > 2147483647
221 register uint64_t x
=*(uint64_t*)pix
;
223 s
+= sq
[(x
>>8)&0xff];
224 s
+= sq
[(x
>>16)&0xff];
225 s
+= sq
[(x
>>24)&0xff];
226 s
+= sq
[(x
>>32)&0xff];
227 s
+= sq
[(x
>>40)&0xff];
228 s
+= sq
[(x
>>48)&0xff];
229 s
+= sq
[(x
>>56)&0xff];
231 register uint32_t x
=*(uint32_t*)pix
;
233 s
+= sq
[(x
>>8)&0xff];
234 s
+= sq
[(x
>>16)&0xff];
235 s
+= sq
[(x
>>24)&0xff];
236 x
=*(uint32_t*)(pix
+4);
238 s
+= sq
[(x
>>8)&0xff];
239 s
+= sq
[(x
>>16)&0xff];
240 s
+= sq
[(x
>>24)&0xff];
245 pix
+= line_size
- 16;
250 static void bswap_buf(uint32_t *dst
, const uint32_t *src
, int w
){
253 for(i
=0; i
+8<=w
; i
+=8){
254 dst
[i
+0]= bswap_32(src
[i
+0]);
255 dst
[i
+1]= bswap_32(src
[i
+1]);
256 dst
[i
+2]= bswap_32(src
[i
+2]);
257 dst
[i
+3]= bswap_32(src
[i
+3]);
258 dst
[i
+4]= bswap_32(src
[i
+4]);
259 dst
[i
+5]= bswap_32(src
[i
+5]);
260 dst
[i
+6]= bswap_32(src
[i
+6]);
261 dst
[i
+7]= bswap_32(src
[i
+7]);
264 dst
[i
+0]= bswap_32(src
[i
+0]);
268 static int sse4_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
271 uint32_t *sq
= ff_squareTbl
+ 256;
274 for (i
= 0; i
< h
; i
++) {
275 s
+= sq
[pix1
[0] - pix2
[0]];
276 s
+= sq
[pix1
[1] - pix2
[1]];
277 s
+= sq
[pix1
[2] - pix2
[2]];
278 s
+= sq
[pix1
[3] - pix2
[3]];
285 static int sse8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
)
288 uint32_t *sq
= ff_squareTbl
+ 256;
291 for (i
= 0; i
< h
; i
++) {
292 s
+= sq
[pix1
[0] - pix2
[0]];
293 s
+= sq
[pix1
[1] - pix2
[1]];
294 s
+= sq
[pix1
[2] - pix2
[2]];
295 s
+= sq
[pix1
[3] - pix2
[3]];
296 s
+= sq
[pix1
[4] - pix2
[4]];
297 s
+= sq
[pix1
[5] - pix2
[5]];
298 s
+= sq
[pix1
[6] - pix2
[6]];
299 s
+= sq
[pix1
[7] - pix2
[7]];
306 static int sse16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
309 uint32_t *sq
= ff_squareTbl
+ 256;
312 for (i
= 0; i
< h
; i
++) {
313 s
+= sq
[pix1
[ 0] - pix2
[ 0]];
314 s
+= sq
[pix1
[ 1] - pix2
[ 1]];
315 s
+= sq
[pix1
[ 2] - pix2
[ 2]];
316 s
+= sq
[pix1
[ 3] - pix2
[ 3]];
317 s
+= sq
[pix1
[ 4] - pix2
[ 4]];
318 s
+= sq
[pix1
[ 5] - pix2
[ 5]];
319 s
+= sq
[pix1
[ 6] - pix2
[ 6]];
320 s
+= sq
[pix1
[ 7] - pix2
[ 7]];
321 s
+= sq
[pix1
[ 8] - pix2
[ 8]];
322 s
+= sq
[pix1
[ 9] - pix2
[ 9]];
323 s
+= sq
[pix1
[10] - pix2
[10]];
324 s
+= sq
[pix1
[11] - pix2
[11]];
325 s
+= sq
[pix1
[12] - pix2
[12]];
326 s
+= sq
[pix1
[13] - pix2
[13]];
327 s
+= sq
[pix1
[14] - pix2
[14]];
328 s
+= sq
[pix1
[15] - pix2
[15]];
337 #ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c
338 static inline int w_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int w
, int h
, int type
){
340 const int dec_count
= w
==8 ? 3 : 4;
343 static const int scale
[2][2][4][4]={
347 {268, 239, 239, 213},
351 // 9/7 16x16 or 32x32 dec=4
352 {344, 310, 310, 280},
360 {275, 245, 245, 218},
364 // 5/3 16x16 or 32x32 dec=4
365 {352, 317, 317, 286},
373 for (i
= 0; i
< h
; i
++) {
374 for (j
= 0; j
< w
; j
+=4) {
375 tmp
[32*i
+j
+0] = (pix1
[j
+0] - pix2
[j
+0])<<4;
376 tmp
[32*i
+j
+1] = (pix1
[j
+1] - pix2
[j
+1])<<4;
377 tmp
[32*i
+j
+2] = (pix1
[j
+2] - pix2
[j
+2])<<4;
378 tmp
[32*i
+j
+3] = (pix1
[j
+3] - pix2
[j
+3])<<4;
384 ff_spatial_dwt(tmp
, w
, h
, 32, type
, dec_count
);
388 for(level
=0; level
<dec_count
; level
++){
389 for(ori
= level
? 1 : 0; ori
<4; ori
++){
390 int size
= w
>>(dec_count
-level
);
391 int sx
= (ori
&1) ? size
: 0;
392 int stride
= 32<<(dec_count
-level
);
393 int sy
= (ori
&2) ? stride
>>1 : 0;
395 for(i
=0; i
<size
; i
++){
396 for(j
=0; j
<size
; j
++){
397 int v
= tmp
[sx
+ sy
+ i
*stride
+ j
] * scale
[type
][dec_count
-3][level
][ori
];
407 static int w53_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
408 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 1);
411 static int w97_8_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
412 return w_c(v
, pix1
, pix2
, line_size
, 8, h
, 0);
415 static int w53_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
416 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 1);
419 static int w97_16_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
420 return w_c(v
, pix1
, pix2
, line_size
, 16, h
, 0);
423 int w53_32_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
424 return w_c(v
, pix1
, pix2
, line_size
, 32, h
, 1);
427 int w97_32_c(void *v
, uint8_t * pix1
, uint8_t * pix2
, int line_size
, int h
){
428 return w_c(v
, pix1
, pix2
, line_size
, 32, h
, 0);
432 /* draw the edges of width 'w' of an image of size width, height */
433 //FIXME check that this is ok for mpeg4 interlaced
434 static void draw_edges_c(uint8_t *buf
, int wrap
, int width
, int height
, int w
)
436 uint8_t *ptr
, *last_line
;
439 last_line
= buf
+ (height
- 1) * wrap
;
442 memcpy(buf
- (i
+ 1) * wrap
, buf
, width
);
443 memcpy(last_line
+ (i
+ 1) * wrap
, last_line
, width
);
447 for(i
=0;i
<height
;i
++) {
448 memset(ptr
- w
, ptr
[0], w
);
449 memset(ptr
+ width
, ptr
[width
-1], w
);
454 memset(buf
- (i
+ 1) * wrap
- w
, buf
[0], w
); /* top left */
455 memset(buf
- (i
+ 1) * wrap
+ width
, buf
[width
-1], w
); /* top right */
456 memset(last_line
+ (i
+ 1) * wrap
- w
, last_line
[0], w
); /* top left */
457 memset(last_line
+ (i
+ 1) * wrap
+ width
, last_line
[width
-1], w
); /* top right */
462 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
463 * @param buf destination buffer
464 * @param src source buffer
465 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
466 * @param block_w width of block
467 * @param block_h height of block
468 * @param src_x x coordinate of the top left sample of the block in the source buffer
469 * @param src_y y coordinate of the top left sample of the block in the source buffer
470 * @param w width of the source buffer
471 * @param h height of the source buffer
473 void ff_emulated_edge_mc(uint8_t *buf
, uint8_t *src
, int linesize
, int block_w
, int block_h
,
474 int src_x
, int src_y
, int w
, int h
){
476 int start_y
, start_x
, end_y
, end_x
;
479 src
+= (h
-1-src_y
)*linesize
;
481 }else if(src_y
<=-block_h
){
482 src
+= (1-block_h
-src_y
)*linesize
;
488 }else if(src_x
<=-block_w
){
489 src
+= (1-block_w
-src_x
);
493 start_y
= FFMAX(0, -src_y
);
494 start_x
= FFMAX(0, -src_x
);
495 end_y
= FFMIN(block_h
, h
-src_y
);
496 end_x
= FFMIN(block_w
, w
-src_x
);
498 // copy existing part
499 for(y
=start_y
; y
<end_y
; y
++){
500 for(x
=start_x
; x
<end_x
; x
++){
501 buf
[x
+ y
*linesize
]= src
[x
+ y
*linesize
];
506 for(y
=0; y
<start_y
; y
++){
507 for(x
=start_x
; x
<end_x
; x
++){
508 buf
[x
+ y
*linesize
]= buf
[x
+ start_y
*linesize
];
513 for(y
=end_y
; y
<block_h
; y
++){
514 for(x
=start_x
; x
<end_x
; x
++){
515 buf
[x
+ y
*linesize
]= buf
[x
+ (end_y
-1)*linesize
];
519 for(y
=0; y
<block_h
; y
++){
521 for(x
=0; x
<start_x
; x
++){
522 buf
[x
+ y
*linesize
]= buf
[start_x
+ y
*linesize
];
526 for(x
=end_x
; x
<block_w
; x
++){
527 buf
[x
+ y
*linesize
]= buf
[end_x
- 1 + y
*linesize
];
532 static void get_pixels_c(DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
)
536 /* read the pixels */
538 block
[0] = pixels
[0];
539 block
[1] = pixels
[1];
540 block
[2] = pixels
[2];
541 block
[3] = pixels
[3];
542 block
[4] = pixels
[4];
543 block
[5] = pixels
[5];
544 block
[6] = pixels
[6];
545 block
[7] = pixels
[7];
551 static void diff_pixels_c(DCTELEM
*restrict block
, const uint8_t *s1
,
552 const uint8_t *s2
, int stride
){
555 /* read the pixels */
557 block
[0] = s1
[0] - s2
[0];
558 block
[1] = s1
[1] - s2
[1];
559 block
[2] = s1
[2] - s2
[2];
560 block
[3] = s1
[3] - s2
[3];
561 block
[4] = s1
[4] - s2
[4];
562 block
[5] = s1
[5] - s2
[5];
563 block
[6] = s1
[6] - s2
[6];
564 block
[7] = s1
[7] - s2
[7];
572 static void put_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
576 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
578 /* read the pixels */
580 pixels
[0] = cm
[block
[0]];
581 pixels
[1] = cm
[block
[1]];
582 pixels
[2] = cm
[block
[2]];
583 pixels
[3] = cm
[block
[3]];
584 pixels
[4] = cm
[block
[4]];
585 pixels
[5] = cm
[block
[5]];
586 pixels
[6] = cm
[block
[6]];
587 pixels
[7] = cm
[block
[7]];
594 static void put_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
598 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
600 /* read the pixels */
602 pixels
[0] = cm
[block
[0]];
603 pixels
[1] = cm
[block
[1]];
604 pixels
[2] = cm
[block
[2]];
605 pixels
[3] = cm
[block
[3]];
612 static void put_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
616 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
618 /* read the pixels */
620 pixels
[0] = cm
[block
[0]];
621 pixels
[1] = cm
[block
[1]];
628 static void put_signed_pixels_clamped_c(const DCTELEM
*block
,
629 uint8_t *restrict pixels
,
634 for (i
= 0; i
< 8; i
++) {
635 for (j
= 0; j
< 8; j
++) {
638 else if (*block
> 127)
641 *pixels
= (uint8_t)(*block
+ 128);
645 pixels
+= (line_size
- 8);
649 static void add_pixels_clamped_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
653 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
655 /* read the pixels */
657 pixels
[0] = cm
[pixels
[0] + block
[0]];
658 pixels
[1] = cm
[pixels
[1] + block
[1]];
659 pixels
[2] = cm
[pixels
[2] + block
[2]];
660 pixels
[3] = cm
[pixels
[3] + block
[3]];
661 pixels
[4] = cm
[pixels
[4] + block
[4]];
662 pixels
[5] = cm
[pixels
[5] + block
[5]];
663 pixels
[6] = cm
[pixels
[6] + block
[6]];
664 pixels
[7] = cm
[pixels
[7] + block
[7]];
670 static void add_pixels_clamped4_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
674 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
676 /* read the pixels */
678 pixels
[0] = cm
[pixels
[0] + block
[0]];
679 pixels
[1] = cm
[pixels
[1] + block
[1]];
680 pixels
[2] = cm
[pixels
[2] + block
[2]];
681 pixels
[3] = cm
[pixels
[3] + block
[3]];
687 static void add_pixels_clamped2_c(const DCTELEM
*block
, uint8_t *restrict pixels
,
691 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
693 /* read the pixels */
695 pixels
[0] = cm
[pixels
[0] + block
[0]];
696 pixels
[1] = cm
[pixels
[1] + block
[1]];
702 static void add_pixels8_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
706 pixels
[0] += block
[0];
707 pixels
[1] += block
[1];
708 pixels
[2] += block
[2];
709 pixels
[3] += block
[3];
710 pixels
[4] += block
[4];
711 pixels
[5] += block
[5];
712 pixels
[6] += block
[6];
713 pixels
[7] += block
[7];
719 static void add_pixels4_c(uint8_t *restrict pixels
, DCTELEM
*block
, int line_size
)
723 pixels
[0] += block
[0];
724 pixels
[1] += block
[1];
725 pixels
[2] += block
[2];
726 pixels
[3] += block
[3];
732 static int sum_abs_dctelem_c(DCTELEM
*block
)
736 sum
+= FFABS(block
[i
]);
742 #define PIXOP2(OPNAME, OP) \
743 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
747 OP(*((uint64_t*)block), AV_RN64(pixels));\
753 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
757 const uint64_t a= AV_RN64(pixels );\
758 const uint64_t b= AV_RN64(pixels+1);\
759 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
765 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
769 const uint64_t a= AV_RN64(pixels );\
770 const uint64_t b= AV_RN64(pixels+1);\
771 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
777 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
781 const uint64_t a= AV_RN64(pixels );\
782 const uint64_t b= AV_RN64(pixels+line_size);\
783 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
789 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
793 const uint64_t a= AV_RN64(pixels );\
794 const uint64_t b= AV_RN64(pixels+line_size);\
795 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
801 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
804 const uint64_t a= AV_RN64(pixels );\
805 const uint64_t b= AV_RN64(pixels+1);\
806 uint64_t l0= (a&0x0303030303030303ULL)\
807 + (b&0x0303030303030303ULL)\
808 + 0x0202020202020202ULL;\
809 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
810 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
814 for(i=0; i<h; i+=2){\
815 uint64_t a= AV_RN64(pixels );\
816 uint64_t b= AV_RN64(pixels+1);\
817 l1= (a&0x0303030303030303ULL)\
818 + (b&0x0303030303030303ULL);\
819 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
820 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
821 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
824 a= AV_RN64(pixels );\
825 b= AV_RN64(pixels+1);\
826 l0= (a&0x0303030303030303ULL)\
827 + (b&0x0303030303030303ULL)\
828 + 0x0202020202020202ULL;\
829 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
830 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
831 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
837 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
840 const uint64_t a= AV_RN64(pixels );\
841 const uint64_t b= AV_RN64(pixels+1);\
842 uint64_t l0= (a&0x0303030303030303ULL)\
843 + (b&0x0303030303030303ULL)\
844 + 0x0101010101010101ULL;\
845 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
846 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
850 for(i=0; i<h; i+=2){\
851 uint64_t a= AV_RN64(pixels );\
852 uint64_t b= AV_RN64(pixels+1);\
853 l1= (a&0x0303030303030303ULL)\
854 + (b&0x0303030303030303ULL);\
855 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
856 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
857 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
860 a= AV_RN64(pixels );\
861 b= AV_RN64(pixels+1);\
862 l0= (a&0x0303030303030303ULL)\
863 + (b&0x0303030303030303ULL)\
864 + 0x0101010101010101ULL;\
865 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
866 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
867 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
873 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
874 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
875 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
876 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
877 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
878 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
879 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
881 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
882 #else // 64 bit variant
884 #define PIXOP2(OPNAME, OP) \
885 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
888 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
893 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
896 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
901 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
904 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
905 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
910 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
911 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
914 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
915 int src_stride1, int src_stride2, int h){\
919 a= AV_RN32(&src1[i*src_stride1 ]);\
920 b= AV_RN32(&src2[i*src_stride2 ]);\
921 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
922 a= AV_RN32(&src1[i*src_stride1+4]);\
923 b= AV_RN32(&src2[i*src_stride2+4]);\
924 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
928 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
929 int src_stride1, int src_stride2, int h){\
933 a= AV_RN32(&src1[i*src_stride1 ]);\
934 b= AV_RN32(&src2[i*src_stride2 ]);\
935 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
936 a= AV_RN32(&src1[i*src_stride1+4]);\
937 b= AV_RN32(&src2[i*src_stride2+4]);\
938 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
942 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
943 int src_stride1, int src_stride2, int h){\
947 a= AV_RN32(&src1[i*src_stride1 ]);\
948 b= AV_RN32(&src2[i*src_stride2 ]);\
949 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
953 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
954 int src_stride1, int src_stride2, int h){\
958 a= AV_RN16(&src1[i*src_stride1 ]);\
959 b= AV_RN16(&src2[i*src_stride2 ]);\
960 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
964 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
965 int src_stride1, int src_stride2, int h){\
966 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
967 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
970 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
971 int src_stride1, int src_stride2, int h){\
972 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
973 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
976 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
977 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
980 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
981 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
984 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
985 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
988 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
989 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
992 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
993 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
996 uint32_t a, b, c, d, l0, l1, h0, h1;\
997 a= AV_RN32(&src1[i*src_stride1]);\
998 b= AV_RN32(&src2[i*src_stride2]);\
999 c= AV_RN32(&src3[i*src_stride3]);\
1000 d= AV_RN32(&src4[i*src_stride4]);\
1001 l0= (a&0x03030303UL)\
1004 h0= ((a&0xFCFCFCFCUL)>>2)\
1005 + ((b&0xFCFCFCFCUL)>>2);\
1006 l1= (c&0x03030303UL)\
1007 + (d&0x03030303UL);\
1008 h1= ((c&0xFCFCFCFCUL)>>2)\
1009 + ((d&0xFCFCFCFCUL)>>2);\
1010 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1011 a= AV_RN32(&src1[i*src_stride1+4]);\
1012 b= AV_RN32(&src2[i*src_stride2+4]);\
1013 c= AV_RN32(&src3[i*src_stride3+4]);\
1014 d= AV_RN32(&src4[i*src_stride4+4]);\
1015 l0= (a&0x03030303UL)\
1018 h0= ((a&0xFCFCFCFCUL)>>2)\
1019 + ((b&0xFCFCFCFCUL)>>2);\
1020 l1= (c&0x03030303UL)\
1021 + (d&0x03030303UL);\
1022 h1= ((c&0xFCFCFCFCUL)>>2)\
1023 + ((d&0xFCFCFCFCUL)>>2);\
1024 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1028 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1029 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1032 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1033 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1036 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1037 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1040 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1041 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1044 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1045 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1047 for(i=0; i<h; i++){\
1048 uint32_t a, b, c, d, l0, l1, h0, h1;\
1049 a= AV_RN32(&src1[i*src_stride1]);\
1050 b= AV_RN32(&src2[i*src_stride2]);\
1051 c= AV_RN32(&src3[i*src_stride3]);\
1052 d= AV_RN32(&src4[i*src_stride4]);\
1053 l0= (a&0x03030303UL)\
1056 h0= ((a&0xFCFCFCFCUL)>>2)\
1057 + ((b&0xFCFCFCFCUL)>>2);\
1058 l1= (c&0x03030303UL)\
1059 + (d&0x03030303UL);\
1060 h1= ((c&0xFCFCFCFCUL)>>2)\
1061 + ((d&0xFCFCFCFCUL)>>2);\
1062 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1063 a= AV_RN32(&src1[i*src_stride1+4]);\
1064 b= AV_RN32(&src2[i*src_stride2+4]);\
1065 c= AV_RN32(&src3[i*src_stride3+4]);\
1066 d= AV_RN32(&src4[i*src_stride4+4]);\
1067 l0= (a&0x03030303UL)\
1070 h0= ((a&0xFCFCFCFCUL)>>2)\
1071 + ((b&0xFCFCFCFCUL)>>2);\
1072 l1= (c&0x03030303UL)\
1073 + (d&0x03030303UL);\
1074 h1= ((c&0xFCFCFCFCUL)>>2)\
1075 + ((d&0xFCFCFCFCUL)>>2);\
1076 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1079 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1080 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1081 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1082 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1084 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1085 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1086 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1087 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1090 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1092 int i, a0, b0, a1, b1;\
1099 for(i=0; i<h; i+=2){\
1105 block[0]= (a1+a0)>>2; /* FIXME non put */\
1106 block[1]= (b1+b0)>>2;\
1116 block[0]= (a1+a0)>>2;\
1117 block[1]= (b1+b0)>>2;\
1123 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1126 const uint32_t a= AV_RN32(pixels );\
1127 const uint32_t b= AV_RN32(pixels+1);\
1128 uint32_t l0= (a&0x03030303UL)\
1131 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1132 + ((b&0xFCFCFCFCUL)>>2);\
1136 for(i=0; i<h; i+=2){\
1137 uint32_t a= AV_RN32(pixels );\
1138 uint32_t b= AV_RN32(pixels+1);\
1139 l1= (a&0x03030303UL)\
1140 + (b&0x03030303UL);\
1141 h1= ((a&0xFCFCFCFCUL)>>2)\
1142 + ((b&0xFCFCFCFCUL)>>2);\
1143 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1146 a= AV_RN32(pixels );\
1147 b= AV_RN32(pixels+1);\
1148 l0= (a&0x03030303UL)\
1151 h0= ((a&0xFCFCFCFCUL)>>2)\
1152 + ((b&0xFCFCFCFCUL)>>2);\
1153 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1159 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1162 for(j=0; j<2; j++){\
1164 const uint32_t a= AV_RN32(pixels );\
1165 const uint32_t b= AV_RN32(pixels+1);\
1166 uint32_t l0= (a&0x03030303UL)\
1169 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1170 + ((b&0xFCFCFCFCUL)>>2);\
1174 for(i=0; i<h; i+=2){\
1175 uint32_t a= AV_RN32(pixels );\
1176 uint32_t b= AV_RN32(pixels+1);\
1177 l1= (a&0x03030303UL)\
1178 + (b&0x03030303UL);\
1179 h1= ((a&0xFCFCFCFCUL)>>2)\
1180 + ((b&0xFCFCFCFCUL)>>2);\
1181 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1184 a= AV_RN32(pixels );\
1185 b= AV_RN32(pixels+1);\
1186 l0= (a&0x03030303UL)\
1189 h0= ((a&0xFCFCFCFCUL)>>2)\
1190 + ((b&0xFCFCFCFCUL)>>2);\
1191 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1195 pixels+=4-line_size*(h+1);\
1196 block +=4-line_size*h;\
1200 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1203 for(j=0; j<2; j++){\
1205 const uint32_t a= AV_RN32(pixels );\
1206 const uint32_t b= AV_RN32(pixels+1);\
1207 uint32_t l0= (a&0x03030303UL)\
1210 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1211 + ((b&0xFCFCFCFCUL)>>2);\
1215 for(i=0; i<h; i+=2){\
1216 uint32_t a= AV_RN32(pixels );\
1217 uint32_t b= AV_RN32(pixels+1);\
1218 l1= (a&0x03030303UL)\
1219 + (b&0x03030303UL);\
1220 h1= ((a&0xFCFCFCFCUL)>>2)\
1221 + ((b&0xFCFCFCFCUL)>>2);\
1222 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1225 a= AV_RN32(pixels );\
1226 b= AV_RN32(pixels+1);\
1227 l0= (a&0x03030303UL)\
1230 h0= ((a&0xFCFCFCFCUL)>>2)\
1231 + ((b&0xFCFCFCFCUL)>>2);\
1232 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1236 pixels+=4-line_size*(h+1);\
1237 block +=4-line_size*h;\
1241 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1242 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1243 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1244 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1245 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1246 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1247 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1248 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1250 #define op_avg(a, b) a = rnd_avg32(a, b)
1252 #define op_put(a, b) a = b
1259 #define avg2(a,b) ((a+b+1)>>1)
1260 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1262 static void put_no_rnd_pixels16_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1263 put_no_rnd_pixels16_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1266 static void put_no_rnd_pixels8_l2_c(uint8_t *dst
, const uint8_t *a
, const uint8_t *b
, int stride
, int h
){
1267 put_no_rnd_pixels8_l2(dst
, a
, b
, stride
, stride
, stride
, h
);
1270 static void gmc1_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int x16
, int y16
, int rounder
)
1272 const int A
=(16-x16
)*(16-y16
);
1273 const int B
=( x16
)*(16-y16
);
1274 const int C
=(16-x16
)*( y16
);
1275 const int D
=( x16
)*( y16
);
1280 dst
[0]= (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + rounder
)>>8;
1281 dst
[1]= (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + rounder
)>>8;
1282 dst
[2]= (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + rounder
)>>8;
1283 dst
[3]= (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + rounder
)>>8;
1284 dst
[4]= (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + rounder
)>>8;
1285 dst
[5]= (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + rounder
)>>8;
1286 dst
[6]= (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + rounder
)>>8;
1287 dst
[7]= (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + rounder
)>>8;
1293 void ff_gmc_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int ox
, int oy
,
1294 int dxx
, int dxy
, int dyx
, int dyy
, int shift
, int r
, int width
, int height
)
1297 const int s
= 1<<shift
;
1307 for(x
=0; x
<8; x
++){ //XXX FIXME optimize
1308 int src_x
, src_y
, frac_x
, frac_y
, index
;
1312 frac_x
= src_x
&(s
-1);
1313 frac_y
= src_y
&(s
-1);
1317 if((unsigned)src_x
< width
){
1318 if((unsigned)src_y
< height
){
1319 index
= src_x
+ src_y
*stride
;
1320 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1321 + src
[index
+1]* frac_x
)*(s
-frac_y
)
1322 + ( src
[index
+stride
]*(s
-frac_x
)
1323 + src
[index
+stride
+1]* frac_x
)* frac_y
1326 index
= src_x
+ av_clip(src_y
, 0, height
)*stride
;
1327 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_x
)
1328 + src
[index
+1]* frac_x
)*s
1332 if((unsigned)src_y
< height
){
1333 index
= av_clip(src_x
, 0, width
) + src_y
*stride
;
1334 dst
[y
*stride
+ x
]= ( ( src
[index
]*(s
-frac_y
)
1335 + src
[index
+stride
]* frac_y
)*s
1338 index
= av_clip(src_x
, 0, width
) + av_clip(src_y
, 0, height
)*stride
;
1339 dst
[y
*stride
+ x
]= src
[index
];
1351 static inline void put_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1353 case 2: put_pixels2_c (dst
, src
, stride
, height
); break;
1354 case 4: put_pixels4_c (dst
, src
, stride
, height
); break;
1355 case 8: put_pixels8_c (dst
, src
, stride
, height
); break;
1356 case 16:put_pixels16_c(dst
, src
, stride
, height
); break;
1360 static inline void put_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1362 for (i
=0; i
< height
; i
++) {
1363 for (j
=0; j
< width
; j
++) {
1364 dst
[j
] = (683*(2*src
[j
] + src
[j
+1] + 1)) >> 11;
1371 static inline void put_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1373 for (i
=0; i
< height
; i
++) {
1374 for (j
=0; j
< width
; j
++) {
1375 dst
[j
] = (683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11;
1382 static inline void put_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1384 for (i
=0; i
< height
; i
++) {
1385 for (j
=0; j
< width
; j
++) {
1386 dst
[j
] = (683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11;
1393 static inline void put_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1395 for (i
=0; i
< height
; i
++) {
1396 for (j
=0; j
< width
; j
++) {
1397 dst
[j
] = (2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15;
1404 static inline void put_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1406 for (i
=0; i
< height
; i
++) {
1407 for (j
=0; j
< width
; j
++) {
1408 dst
[j
] = (2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1415 static inline void put_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1417 for (i
=0; i
< height
; i
++) {
1418 for (j
=0; j
< width
; j
++) {
1419 dst
[j
] = (683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11;
1426 static inline void put_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1428 for (i
=0; i
< height
; i
++) {
1429 for (j
=0; j
< width
; j
++) {
1430 dst
[j
] = (2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15;
1437 static inline void put_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1439 for (i
=0; i
< height
; i
++) {
1440 for (j
=0; j
< width
; j
++) {
1441 dst
[j
] = (2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15;
1448 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1450 case 2: avg_pixels2_c (dst
, src
, stride
, height
); break;
1451 case 4: avg_pixels4_c (dst
, src
, stride
, height
); break;
1452 case 8: avg_pixels8_c (dst
, src
, stride
, height
); break;
1453 case 16:avg_pixels16_c(dst
, src
, stride
, height
); break;
1457 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1459 for (i
=0; i
< height
; i
++) {
1460 for (j
=0; j
< width
; j
++) {
1461 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+1] + 1)) >> 11) + 1) >> 1;
1468 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1470 for (i
=0; i
< height
; i
++) {
1471 for (j
=0; j
< width
; j
++) {
1472 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+1] + 1)) >> 11) + 1) >> 1;
1479 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1481 for (i
=0; i
< height
; i
++) {
1482 for (j
=0; j
< width
; j
++) {
1483 dst
[j
] = (dst
[j
] + ((683*(2*src
[j
] + src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1490 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1492 for (i
=0; i
< height
; i
++) {
1493 for (j
=0; j
< width
; j
++) {
1494 dst
[j
] = (dst
[j
] + ((2731*(4*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 2*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1501 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1503 for (i
=0; i
< height
; i
++) {
1504 for (j
=0; j
< width
; j
++) {
1505 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 2*src
[j
+1] + 4*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1512 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1514 for (i
=0; i
< height
; i
++) {
1515 for (j
=0; j
< width
; j
++) {
1516 dst
[j
] = (dst
[j
] + ((683*(src
[j
] + 2*src
[j
+stride
] + 1)) >> 11) + 1) >> 1;
1523 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1525 for (i
=0; i
< height
; i
++) {
1526 for (j
=0; j
< width
; j
++) {
1527 dst
[j
] = (dst
[j
] + ((2731*(3*src
[j
] + 4*src
[j
+1] + 2*src
[j
+stride
] + 3*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1534 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst
, const uint8_t *src
, int stride
, int width
, int height
){
1536 for (i
=0; i
< height
; i
++) {
1537 for (j
=0; j
< width
; j
++) {
1538 dst
[j
] = (dst
[j
] + ((2731*(2*src
[j
] + 3*src
[j
+1] + 3*src
[j
+stride
] + 4*src
[j
+stride
+1] + 6)) >> 15) + 1) >> 1;
1545 #define TPEL_WIDTH(width)\
1546 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1547 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1548 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1549 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1550 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1551 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1552 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1553 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1554 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1555 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1556 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1557 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1558 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1559 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1560 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1561 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1562 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1563 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1566 #define H264_CHROMA_MC(OPNAME, OP)\
1567 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1568 const int A=(8-x)*(8-y);\
1569 const int B=( x)*(8-y);\
1570 const int C=(8-x)*( y);\
1571 const int D=( x)*( y);\
1574 assert(x<8 && y<8 && x>=0 && y>=0);\
1577 for(i=0; i<h; i++){\
1578 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1579 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1585 const int step= C ? stride : 1;\
1586 for(i=0; i<h; i++){\
1587 OP(dst[0], (A*src[0] + E*src[step+0]));\
1588 OP(dst[1], (A*src[1] + E*src[step+1]));\
1595 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1596 const int A=(8-x)*(8-y);\
1597 const int B=( x)*(8-y);\
1598 const int C=(8-x)*( y);\
1599 const int D=( x)*( y);\
1602 assert(x<8 && y<8 && x>=0 && y>=0);\
1605 for(i=0; i<h; i++){\
1606 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1607 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1608 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1609 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1615 const int step= C ? stride : 1;\
1616 for(i=0; i<h; i++){\
1617 OP(dst[0], (A*src[0] + E*src[step+0]));\
1618 OP(dst[1], (A*src[1] + E*src[step+1]));\
1619 OP(dst[2], (A*src[2] + E*src[step+2]));\
1620 OP(dst[3], (A*src[3] + E*src[step+3]));\
1627 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1628 const int A=(8-x)*(8-y);\
1629 const int B=( x)*(8-y);\
1630 const int C=(8-x)*( y);\
1631 const int D=( x)*( y);\
1634 assert(x<8 && y<8 && x>=0 && y>=0);\
1637 for(i=0; i<h; i++){\
1638 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1639 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1640 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1641 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1642 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1643 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1644 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1645 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1651 const int step= C ? stride : 1;\
1652 for(i=0; i<h; i++){\
1653 OP(dst[0], (A*src[0] + E*src[step+0]));\
1654 OP(dst[1], (A*src[1] + E*src[step+1]));\
1655 OP(dst[2], (A*src[2] + E*src[step+2]));\
1656 OP(dst[3], (A*src[3] + E*src[step+3]));\
1657 OP(dst[4], (A*src[4] + E*src[step+4]));\
1658 OP(dst[5], (A*src[5] + E*src[step+5]));\
1659 OP(dst[6], (A*src[6] + E*src[step+6]));\
1660 OP(dst[7], (A*src[7] + E*src[step+7]));\
1667 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1668 #define op_put(a, b) a = (((b) + 32)>>6)
1670 H264_CHROMA_MC(put_
, op_put
)
1671 H264_CHROMA_MC(avg_
, op_avg
)
1675 static void put_no_rnd_h264_chroma_mc8_c(uint8_t *dst
/*align 8*/, uint8_t *src
/*align 1*/, int stride
, int h
, int x
, int y
){
1676 const int A
=(8-x
)*(8-y
);
1677 const int B
=( x
)*(8-y
);
1678 const int C
=(8-x
)*( y
);
1679 const int D
=( x
)*( y
);
1682 assert(x
<8 && y
<8 && x
>=0 && y
>=0);
1686 dst
[0] = (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + 32 - 4) >> 6;
1687 dst
[1] = (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + 32 - 4) >> 6;
1688 dst
[2] = (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + 32 - 4) >> 6;
1689 dst
[3] = (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + 32 - 4) >> 6;
1690 dst
[4] = (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + 32 - 4) >> 6;
1691 dst
[5] = (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + 32 - 4) >> 6;
1692 dst
[6] = (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + 32 - 4) >> 6;
1693 dst
[7] = (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + 32 - 4) >> 6;
1699 #define QPEL_MC(r, OPNAME, RND, OP) \
1700 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1701 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1705 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1706 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1707 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1708 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1709 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1710 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1711 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1712 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1718 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1720 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1724 const int src0= src[0*srcStride];\
1725 const int src1= src[1*srcStride];\
1726 const int src2= src[2*srcStride];\
1727 const int src3= src[3*srcStride];\
1728 const int src4= src[4*srcStride];\
1729 const int src5= src[5*srcStride];\
1730 const int src6= src[6*srcStride];\
1731 const int src7= src[7*srcStride];\
1732 const int src8= src[8*srcStride];\
1733 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1734 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1735 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1736 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1737 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1738 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1739 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1740 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1746 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1747 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1752 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1753 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1754 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1755 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1756 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1757 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1758 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1759 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1760 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1761 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1762 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1763 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1764 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1765 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1766 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1767 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1773 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1774 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1779 const int src0= src[0*srcStride];\
1780 const int src1= src[1*srcStride];\
1781 const int src2= src[2*srcStride];\
1782 const int src3= src[3*srcStride];\
1783 const int src4= src[4*srcStride];\
1784 const int src5= src[5*srcStride];\
1785 const int src6= src[6*srcStride];\
1786 const int src7= src[7*srcStride];\
1787 const int src8= src[8*srcStride];\
1788 const int src9= src[9*srcStride];\
1789 const int src10= src[10*srcStride];\
1790 const int src11= src[11*srcStride];\
1791 const int src12= src[12*srcStride];\
1792 const int src13= src[13*srcStride];\
1793 const int src14= src[14*srcStride];\
1794 const int src15= src[15*srcStride];\
1795 const int src16= src[16*srcStride];\
1796 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1797 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1798 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1799 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1800 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1801 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1802 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1803 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1804 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1805 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1806 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1807 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1808 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1809 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1810 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1811 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1817 static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1818 OPNAME ## pixels8_c(dst, src, stride, 8);\
1821 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1823 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1824 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1827 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1828 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1831 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1833 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1834 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1837 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1838 uint8_t full[16*9];\
1840 copy_block9(full, src, 16, stride, 9);\
1841 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1842 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1845 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1846 uint8_t full[16*9];\
1847 copy_block9(full, src, 16, stride, 9);\
1848 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1851 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1852 uint8_t full[16*9];\
1854 copy_block9(full, src, 16, stride, 9);\
1855 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1856 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1858 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1859 uint8_t full[16*9];\
1862 uint8_t halfHV[64];\
1863 copy_block9(full, src, 16, stride, 9);\
1864 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1865 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1866 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1867 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1869 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1870 uint8_t full[16*9];\
1872 uint8_t halfHV[64];\
1873 copy_block9(full, src, 16, stride, 9);\
1874 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1875 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1876 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1877 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1879 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1880 uint8_t full[16*9];\
1883 uint8_t halfHV[64];\
1884 copy_block9(full, src, 16, stride, 9);\
1885 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1886 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1887 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1888 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1890 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1891 uint8_t full[16*9];\
1893 uint8_t halfHV[64];\
1894 copy_block9(full, src, 16, stride, 9);\
1895 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1896 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1897 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1898 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1900 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1901 uint8_t full[16*9];\
1904 uint8_t halfHV[64];\
1905 copy_block9(full, src, 16, stride, 9);\
1906 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1907 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1908 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1909 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1911 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1912 uint8_t full[16*9];\
1914 uint8_t halfHV[64];\
1915 copy_block9(full, src, 16, stride, 9);\
1916 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1917 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1918 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1919 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1921 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1922 uint8_t full[16*9];\
1925 uint8_t halfHV[64];\
1926 copy_block9(full, src, 16, stride, 9);\
1927 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1928 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1929 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1930 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1932 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1933 uint8_t full[16*9];\
1935 uint8_t halfHV[64];\
1936 copy_block9(full, src, 16, stride, 9);\
1937 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1938 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1939 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1940 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1942 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1944 uint8_t halfHV[64];\
1945 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1946 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1947 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1949 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1951 uint8_t halfHV[64];\
1952 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1953 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1954 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1956 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1957 uint8_t full[16*9];\
1960 uint8_t halfHV[64];\
1961 copy_block9(full, src, 16, stride, 9);\
1962 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1963 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1964 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1965 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1967 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1968 uint8_t full[16*9];\
1970 copy_block9(full, src, 16, stride, 9);\
1971 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1972 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1973 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1975 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1976 uint8_t full[16*9];\
1979 uint8_t halfHV[64];\
1980 copy_block9(full, src, 16, stride, 9);\
1981 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1982 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1983 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1984 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1986 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1987 uint8_t full[16*9];\
1989 copy_block9(full, src, 16, stride, 9);\
1990 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1991 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1992 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1994 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1996 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1997 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1999 static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2000 OPNAME ## pixels16_c(dst, src, stride, 16);\
2003 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2005 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2006 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2009 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2010 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
2013 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2015 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2016 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2019 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2020 uint8_t full[24*17];\
2022 copy_block17(full, src, 24, stride, 17);\
2023 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2024 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2027 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2028 uint8_t full[24*17];\
2029 copy_block17(full, src, 24, stride, 17);\
2030 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
2033 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2034 uint8_t full[24*17];\
2036 copy_block17(full, src, 24, stride, 17);\
2037 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2038 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2040 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2041 uint8_t full[24*17];\
2042 uint8_t halfH[272];\
2043 uint8_t halfV[256];\
2044 uint8_t halfHV[256];\
2045 copy_block17(full, src, 24, stride, 17);\
2046 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2047 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2048 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2049 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2051 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2052 uint8_t full[24*17];\
2053 uint8_t halfH[272];\
2054 uint8_t halfHV[256];\
2055 copy_block17(full, src, 24, stride, 17);\
2056 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2057 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2058 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2059 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2061 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2062 uint8_t full[24*17];\
2063 uint8_t halfH[272];\
2064 uint8_t halfV[256];\
2065 uint8_t halfHV[256];\
2066 copy_block17(full, src, 24, stride, 17);\
2067 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2068 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2069 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2070 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2072 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2073 uint8_t full[24*17];\
2074 uint8_t halfH[272];\
2075 uint8_t halfHV[256];\
2076 copy_block17(full, src, 24, stride, 17);\
2077 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2078 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2079 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2080 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2082 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2083 uint8_t full[24*17];\
2084 uint8_t halfH[272];\
2085 uint8_t halfV[256];\
2086 uint8_t halfHV[256];\
2087 copy_block17(full, src, 24, stride, 17);\
2088 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2089 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2090 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2091 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2093 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2094 uint8_t full[24*17];\
2095 uint8_t halfH[272];\
2096 uint8_t halfHV[256];\
2097 copy_block17(full, src, 24, stride, 17);\
2098 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2099 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2100 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2101 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2103 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2104 uint8_t full[24*17];\
2105 uint8_t halfH[272];\
2106 uint8_t halfV[256];\
2107 uint8_t halfHV[256];\
2108 copy_block17(full, src, 24, stride, 17);\
2109 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
2110 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2111 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2112 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2114 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2115 uint8_t full[24*17];\
2116 uint8_t halfH[272];\
2117 uint8_t halfHV[256];\
2118 copy_block17(full, src, 24, stride, 17);\
2119 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2120 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2121 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2122 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2124 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2125 uint8_t halfH[272];\
2126 uint8_t halfHV[256];\
2127 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2128 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2129 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2131 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2132 uint8_t halfH[272];\
2133 uint8_t halfHV[256];\
2134 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2135 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2136 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2138 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2139 uint8_t full[24*17];\
2140 uint8_t halfH[272];\
2141 uint8_t halfV[256];\
2142 uint8_t halfHV[256];\
2143 copy_block17(full, src, 24, stride, 17);\
2144 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2145 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2146 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2147 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2149 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2150 uint8_t full[24*17];\
2151 uint8_t halfH[272];\
2152 copy_block17(full, src, 24, stride, 17);\
2153 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2154 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2155 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2157 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2158 uint8_t full[24*17];\
2159 uint8_t halfH[272];\
2160 uint8_t halfV[256];\
2161 uint8_t halfHV[256];\
2162 copy_block17(full, src, 24, stride, 17);\
2163 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2164 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2165 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2166 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2168 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2169 uint8_t full[24*17];\
2170 uint8_t halfH[272];\
2171 copy_block17(full, src, 24, stride, 17);\
2172 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2173 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2174 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2176 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2177 uint8_t halfH[272];\
2178 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2179 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2182 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2183 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2184 #define op_put(a, b) a = cm[((b) + 16)>>5]
2185 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2187 QPEL_MC(0, put_
, _
, op_put
)
2188 QPEL_MC(1, put_no_rnd_
, _no_rnd_
, op_put_no_rnd
)
2189 QPEL_MC(0, avg_
, _
, op_avg
)
2190 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
2192 #undef op_avg_no_rnd
2194 #undef op_put_no_rnd
2197 #define H264_LOWPASS(OPNAME, OP, OP2) \
2198 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2200 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2204 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2205 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2211 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2213 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2217 const int srcB= src[-2*srcStride];\
2218 const int srcA= src[-1*srcStride];\
2219 const int src0= src[0 *srcStride];\
2220 const int src1= src[1 *srcStride];\
2221 const int src2= src[2 *srcStride];\
2222 const int src3= src[3 *srcStride];\
2223 const int src4= src[4 *srcStride];\
2224 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2225 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2231 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2234 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2236 src -= 2*srcStride;\
2237 for(i=0; i<h+5; i++)\
2239 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2240 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2244 tmp -= tmpStride*(h+5-2);\
2247 const int tmpB= tmp[-2*tmpStride];\
2248 const int tmpA= tmp[-1*tmpStride];\
2249 const int tmp0= tmp[0 *tmpStride];\
2250 const int tmp1= tmp[1 *tmpStride];\
2251 const int tmp2= tmp[2 *tmpStride];\
2252 const int tmp3= tmp[3 *tmpStride];\
2253 const int tmp4= tmp[4 *tmpStride];\
2254 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2255 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2260 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2262 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2266 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2267 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2268 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2269 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2275 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2277 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2281 const int srcB= src[-2*srcStride];\
2282 const int srcA= src[-1*srcStride];\
2283 const int src0= src[0 *srcStride];\
2284 const int src1= src[1 *srcStride];\
2285 const int src2= src[2 *srcStride];\
2286 const int src3= src[3 *srcStride];\
2287 const int src4= src[4 *srcStride];\
2288 const int src5= src[5 *srcStride];\
2289 const int src6= src[6 *srcStride];\
2290 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2291 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2292 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2293 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2299 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2302 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2304 src -= 2*srcStride;\
2305 for(i=0; i<h+5; i++)\
2307 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2308 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2309 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2310 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2314 tmp -= tmpStride*(h+5-2);\
2317 const int tmpB= tmp[-2*tmpStride];\
2318 const int tmpA= tmp[-1*tmpStride];\
2319 const int tmp0= tmp[0 *tmpStride];\
2320 const int tmp1= tmp[1 *tmpStride];\
2321 const int tmp2= tmp[2 *tmpStride];\
2322 const int tmp3= tmp[3 *tmpStride];\
2323 const int tmp4= tmp[4 *tmpStride];\
2324 const int tmp5= tmp[5 *tmpStride];\
2325 const int tmp6= tmp[6 *tmpStride];\
2326 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2327 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2328 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2329 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2335 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2337 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2341 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2342 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2343 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2344 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2345 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2346 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2347 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2348 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2354 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2356 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2360 const int srcB= src[-2*srcStride];\
2361 const int srcA= src[-1*srcStride];\
2362 const int src0= src[0 *srcStride];\
2363 const int src1= src[1 *srcStride];\
2364 const int src2= src[2 *srcStride];\
2365 const int src3= src[3 *srcStride];\
2366 const int src4= src[4 *srcStride];\
2367 const int src5= src[5 *srcStride];\
2368 const int src6= src[6 *srcStride];\
2369 const int src7= src[7 *srcStride];\
2370 const int src8= src[8 *srcStride];\
2371 const int src9= src[9 *srcStride];\
2372 const int src10=src[10*srcStride];\
2373 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2374 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2375 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2376 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2377 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2378 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2379 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2380 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2386 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2389 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2391 src -= 2*srcStride;\
2392 for(i=0; i<h+5; i++)\
2394 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2395 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2396 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2397 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2398 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2399 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2400 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2401 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2405 tmp -= tmpStride*(h+5-2);\
2408 const int tmpB= tmp[-2*tmpStride];\
2409 const int tmpA= tmp[-1*tmpStride];\
2410 const int tmp0= tmp[0 *tmpStride];\
2411 const int tmp1= tmp[1 *tmpStride];\
2412 const int tmp2= tmp[2 *tmpStride];\
2413 const int tmp3= tmp[3 *tmpStride];\
2414 const int tmp4= tmp[4 *tmpStride];\
2415 const int tmp5= tmp[5 *tmpStride];\
2416 const int tmp6= tmp[6 *tmpStride];\
2417 const int tmp7= tmp[7 *tmpStride];\
2418 const int tmp8= tmp[8 *tmpStride];\
2419 const int tmp9= tmp[9 *tmpStride];\
2420 const int tmp10=tmp[10*tmpStride];\
2421 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2422 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2423 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2424 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2425 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2426 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2427 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2428 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2434 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2435 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2436 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2437 src += 8*srcStride;\
2438 dst += 8*dstStride;\
2439 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2440 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2443 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2444 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2445 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2446 src += 8*srcStride;\
2447 dst += 8*dstStride;\
2448 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2449 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2452 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2453 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2454 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2455 src += 8*srcStride;\
2456 dst += 8*dstStride;\
2457 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2458 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2461 #define H264_MC(OPNAME, SIZE) \
2462 static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2463 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2466 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2467 uint8_t half[SIZE*SIZE];\
2468 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2469 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2472 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2473 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2476 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2477 uint8_t half[SIZE*SIZE];\
2478 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2479 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2482 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2483 uint8_t full[SIZE*(SIZE+5)];\
2484 uint8_t * const full_mid= full + SIZE*2;\
2485 uint8_t half[SIZE*SIZE];\
2486 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2487 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2488 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2491 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2492 uint8_t full[SIZE*(SIZE+5)];\
2493 uint8_t * const full_mid= full + SIZE*2;\
2494 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2495 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2498 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2499 uint8_t full[SIZE*(SIZE+5)];\
2500 uint8_t * const full_mid= full + SIZE*2;\
2501 uint8_t half[SIZE*SIZE];\
2502 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2503 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2504 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2507 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2508 uint8_t full[SIZE*(SIZE+5)];\
2509 uint8_t * const full_mid= full + SIZE*2;\
2510 uint8_t halfH[SIZE*SIZE];\
2511 uint8_t halfV[SIZE*SIZE];\
2512 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2513 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2514 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2515 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2518 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2519 uint8_t full[SIZE*(SIZE+5)];\
2520 uint8_t * const full_mid= full + SIZE*2;\
2521 uint8_t halfH[SIZE*SIZE];\
2522 uint8_t halfV[SIZE*SIZE];\
2523 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2524 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2525 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2526 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2529 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2530 uint8_t full[SIZE*(SIZE+5)];\
2531 uint8_t * const full_mid= full + SIZE*2;\
2532 uint8_t halfH[SIZE*SIZE];\
2533 uint8_t halfV[SIZE*SIZE];\
2534 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2535 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2536 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2537 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2540 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2541 uint8_t full[SIZE*(SIZE+5)];\
2542 uint8_t * const full_mid= full + SIZE*2;\
2543 uint8_t halfH[SIZE*SIZE];\
2544 uint8_t halfV[SIZE*SIZE];\
2545 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2546 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2547 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2548 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2551 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2552 int16_t tmp[SIZE*(SIZE+5)];\
2553 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2556 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2557 int16_t tmp[SIZE*(SIZE+5)];\
2558 uint8_t halfH[SIZE*SIZE];\
2559 uint8_t halfHV[SIZE*SIZE];\
2560 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2561 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2562 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2565 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2566 int16_t tmp[SIZE*(SIZE+5)];\
2567 uint8_t halfH[SIZE*SIZE];\
2568 uint8_t halfHV[SIZE*SIZE];\
2569 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2570 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2571 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2574 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2575 uint8_t full[SIZE*(SIZE+5)];\
2576 uint8_t * const full_mid= full + SIZE*2;\
2577 int16_t tmp[SIZE*(SIZE+5)];\
2578 uint8_t halfV[SIZE*SIZE];\
2579 uint8_t halfHV[SIZE*SIZE];\
2580 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2581 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2582 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2583 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2586 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2587 uint8_t full[SIZE*(SIZE+5)];\
2588 uint8_t * const full_mid= full + SIZE*2;\
2589 int16_t tmp[SIZE*(SIZE+5)];\
2590 uint8_t halfV[SIZE*SIZE];\
2591 uint8_t halfHV[SIZE*SIZE];\
2592 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2593 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2594 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2595 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2598 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2599 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2600 #define op_put(a, b) a = cm[((b) + 16)>>5]
2601 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2602 #define op2_put(a, b) a = cm[((b) + 512)>>10]
2604 H264_LOWPASS(put_
, op_put
, op2_put
)
2605 H264_LOWPASS(avg_
, op_avg
, op2_avg
)
2620 #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + offset) >> log2_denom )
2621 #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
2622 #define H264_WEIGHT(W,H) \
2623 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
2625 offset <<= log2_denom; \
2626 if(log2_denom) offset += 1<<(log2_denom-1); \
2627 for(y=0; y<H; y++, block += stride){ \
2630 if(W==2) continue; \
2633 if(W==4) continue; \
2638 if(W==8) continue; \
2649 static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
2651 offset = ((offset + 1) | 1) << log2_denom; \
2652 for(y=0; y<H; y++, dst += stride, src += stride){ \
2655 if(W==2) continue; \
2658 if(W==4) continue; \
2663 if(W==8) continue; \
2690 static void wmv2_mspel8_h_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int h
){
2691 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
2695 dst
[0]= cm
[(9*(src
[0] + src
[1]) - (src
[-1] + src
[2]) + 8)>>4];
2696 dst
[1]= cm
[(9*(src
[1] + src
[2]) - (src
[ 0] + src
[3]) + 8)>>4];
2697 dst
[2]= cm
[(9*(src
[2] + src
[3]) - (src
[ 1] + src
[4]) + 8)>>4];
2698 dst
[3]= cm
[(9*(src
[3] + src
[4]) - (src
[ 2] + src
[5]) + 8)>>4];
2699 dst
[4]= cm
[(9*(src
[4] + src
[5]) - (src
[ 3] + src
[6]) + 8)>>4];
2700 dst
[5]= cm
[(9*(src
[5] + src
[6]) - (src
[ 4] + src
[7]) + 8)>>4];
2701 dst
[6]= cm
[(9*(src
[6] + src
[7]) - (src
[ 5] + src
[8]) + 8)>>4];
2702 dst
[7]= cm
[(9*(src
[7] + src
[8]) - (src
[ 6] + src
[9]) + 8)>>4];
2708 #ifdef CONFIG_CAVS_DECODER
2710 void ff_cavsdsp_init(DSPContext
* c
, AVCodecContext
*avctx
);
2712 void ff_put_cavs_qpel8_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2713 put_pixels8_c(dst
, src
, stride
, 8);
2715 void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2716 avg_pixels8_c(dst
, src
, stride
, 8);
2718 void ff_put_cavs_qpel16_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2719 put_pixels16_c(dst
, src
, stride
, 16);
2721 void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
) {
2722 avg_pixels16_c(dst
, src
, stride
, 16);
2724 #endif /* CONFIG_CAVS_DECODER */
2726 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
2728 void ff_vc1dsp_init(DSPContext
* c
, AVCodecContext
*avctx
);
2730 void ff_put_vc1_mspel_mc00_c(uint8_t *dst
, uint8_t *src
, int stride
, int rnd
) {
2731 put_pixels8_c(dst
, src
, stride
, 8);
2733 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
2735 void ff_intrax8dsp_init(DSPContext
* c
, AVCodecContext
*avctx
);
2738 void ff_h264dspenc_init(DSPContext
* c
, AVCodecContext
*avctx
);
2740 static void wmv2_mspel8_v_lowpass(uint8_t *dst
, uint8_t *src
, int dstStride
, int srcStride
, int w
){
2741 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
2745 const int src_1
= src
[ -srcStride
];
2746 const int src0
= src
[0 ];
2747 const int src1
= src
[ srcStride
];
2748 const int src2
= src
[2*srcStride
];
2749 const int src3
= src
[3*srcStride
];
2750 const int src4
= src
[4*srcStride
];
2751 const int src5
= src
[5*srcStride
];
2752 const int src6
= src
[6*srcStride
];
2753 const int src7
= src
[7*srcStride
];
2754 const int src8
= src
[8*srcStride
];
2755 const int src9
= src
[9*srcStride
];
2756 dst
[0*dstStride
]= cm
[(9*(src0
+ src1
) - (src_1
+ src2
) + 8)>>4];
2757 dst
[1*dstStride
]= cm
[(9*(src1
+ src2
) - (src0
+ src3
) + 8)>>4];
2758 dst
[2*dstStride
]= cm
[(9*(src2
+ src3
) - (src1
+ src4
) + 8)>>4];
2759 dst
[3*dstStride
]= cm
[(9*(src3
+ src4
) - (src2
+ src5
) + 8)>>4];
2760 dst
[4*dstStride
]= cm
[(9*(src4
+ src5
) - (src3
+ src6
) + 8)>>4];
2761 dst
[5*dstStride
]= cm
[(9*(src5
+ src6
) - (src4
+ src7
) + 8)>>4];
2762 dst
[6*dstStride
]= cm
[(9*(src6
+ src7
) - (src5
+ src8
) + 8)>>4];
2763 dst
[7*dstStride
]= cm
[(9*(src7
+ src8
) - (src6
+ src9
) + 8)>>4];
2769 static void put_mspel8_mc00_c (uint8_t *dst
, uint8_t *src
, int stride
){
2770 put_pixels8_c(dst
, src
, stride
, 8);
2773 static void put_mspel8_mc10_c(uint8_t *dst
, uint8_t *src
, int stride
){
2775 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2776 put_pixels8_l2(dst
, src
, half
, stride
, stride
, 8, 8);
2779 static void put_mspel8_mc20_c(uint8_t *dst
, uint8_t *src
, int stride
){
2780 wmv2_mspel8_h_lowpass(dst
, src
, stride
, stride
, 8);
2783 static void put_mspel8_mc30_c(uint8_t *dst
, uint8_t *src
, int stride
){
2785 wmv2_mspel8_h_lowpass(half
, src
, 8, stride
, 8);
2786 put_pixels8_l2(dst
, src
+1, half
, stride
, stride
, 8, 8);
2789 static void put_mspel8_mc02_c(uint8_t *dst
, uint8_t *src
, int stride
){
2790 wmv2_mspel8_v_lowpass(dst
, src
, stride
, stride
, 8);
2793 static void put_mspel8_mc12_c(uint8_t *dst
, uint8_t *src
, int stride
){
2797 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2798 wmv2_mspel8_v_lowpass(halfV
, src
, 8, stride
, 8);
2799 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2800 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2802 static void put_mspel8_mc32_c(uint8_t *dst
, uint8_t *src
, int stride
){
2806 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2807 wmv2_mspel8_v_lowpass(halfV
, src
+1, 8, stride
, 8);
2808 wmv2_mspel8_v_lowpass(halfHV
, halfH
+8, 8, 8, 8);
2809 put_pixels8_l2(dst
, halfV
, halfHV
, stride
, 8, 8, 8);
2811 static void put_mspel8_mc22_c(uint8_t *dst
, uint8_t *src
, int stride
){
2813 wmv2_mspel8_h_lowpass(halfH
, src
-stride
, 8, stride
, 11);
2814 wmv2_mspel8_v_lowpass(dst
, halfH
+8, stride
, 8, 8);
2817 static void h263_v_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2818 if(ENABLE_ANY_H263
) {
2820 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2824 int p0
= src
[x
-2*stride
];
2825 int p1
= src
[x
-1*stride
];
2826 int p2
= src
[x
+0*stride
];
2827 int p3
= src
[x
+1*stride
];
2828 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2830 if (d
<-2*strength
) d1
= 0;
2831 else if(d
<- strength
) d1
=-2*strength
- d
;
2832 else if(d
< strength
) d1
= d
;
2833 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2838 if(p1
&256) p1
= ~(p1
>>31);
2839 if(p2
&256) p2
= ~(p2
>>31);
2841 src
[x
-1*stride
] = p1
;
2842 src
[x
+0*stride
] = p2
;
2846 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
2848 src
[x
-2*stride
] = p0
- d2
;
2849 src
[x
+ stride
] = p3
+ d2
;
2854 static void h263_h_loop_filter_c(uint8_t *src
, int stride
, int qscale
){
2855 if(ENABLE_ANY_H263
) {
2857 const int strength
= ff_h263_loop_filter_strength
[qscale
];
2861 int p0
= src
[y
*stride
-2];
2862 int p1
= src
[y
*stride
-1];
2863 int p2
= src
[y
*stride
+0];
2864 int p3
= src
[y
*stride
+1];
2865 int d
= (p0
- p3
+ 4*(p2
- p1
)) / 8;
2867 if (d
<-2*strength
) d1
= 0;
2868 else if(d
<- strength
) d1
=-2*strength
- d
;
2869 else if(d
< strength
) d1
= d
;
2870 else if(d
< 2*strength
) d1
= 2*strength
- d
;
2875 if(p1
&256) p1
= ~(p1
>>31);
2876 if(p2
&256) p2
= ~(p2
>>31);
2878 src
[y
*stride
-1] = p1
;
2879 src
[y
*stride
+0] = p2
;
2883 d2
= av_clip((p0
-p3
)/4, -ad1
, ad1
);
2885 src
[y
*stride
-2] = p0
- d2
;
2886 src
[y
*stride
+1] = p3
+ d2
;
2891 static void h261_loop_filter_c(uint8_t *src
, int stride
){
2896 temp
[x
] = 4*src
[x
];
2897 temp
[x
+ 7*8] = 4*src
[x
+ 7*stride
];
2901 xy
= y
* stride
+ x
;
2903 temp
[yz
] = src
[xy
- stride
] + 2*src
[xy
] + src
[xy
+ stride
];
2908 src
[ y
*stride
] = (temp
[ y
*8] + 2)>>2;
2909 src
[7+y
*stride
] = (temp
[7+y
*8] + 2)>>2;
2911 xy
= y
* stride
+ x
;
2913 src
[xy
] = (temp
[yz
-1] + 2*temp
[yz
] + temp
[yz
+1] + 8)>>4;
2918 static inline void h264_loop_filter_luma_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
, int8_t *tc0
)
2921 for( i
= 0; i
< 4; i
++ ) {
2926 for( d
= 0; d
< 4; d
++ ) {
2927 const int p0
= pix
[-1*xstride
];
2928 const int p1
= pix
[-2*xstride
];
2929 const int p2
= pix
[-3*xstride
];
2930 const int q0
= pix
[0];
2931 const int q1
= pix
[1*xstride
];
2932 const int q2
= pix
[2*xstride
];
2934 if( FFABS( p0
- q0
) < alpha
&&
2935 FFABS( p1
- p0
) < beta
&&
2936 FFABS( q1
- q0
) < beta
) {
2941 if( FFABS( p2
- p0
) < beta
) {
2942 pix
[-2*xstride
] = p1
+ av_clip( (( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - p1
, -tc0
[i
], tc0
[i
] );
2945 if( FFABS( q2
- q0
) < beta
) {
2946 pix
[ xstride
] = q1
+ av_clip( (( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) ) >> 1) - q1
, -tc0
[i
], tc0
[i
] );
2950 i_delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
2951 pix
[-xstride
] = av_clip_uint8( p0
+ i_delta
); /* p0' */
2952 pix
[0] = av_clip_uint8( q0
- i_delta
); /* q0' */
2958 static void h264_v_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2960 h264_loop_filter_luma_c(pix
, stride
, 1, alpha
, beta
, tc0
);
2962 static void h264_h_loop_filter_luma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2964 h264_loop_filter_luma_c(pix
, 1, stride
, alpha
, beta
, tc0
);
2967 static inline void h264_loop_filter_chroma_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
, int8_t *tc0
)
2970 for( i
= 0; i
< 4; i
++ ) {
2971 const int tc
= tc0
[i
];
2976 for( d
= 0; d
< 2; d
++ ) {
2977 const int p0
= pix
[-1*xstride
];
2978 const int p1
= pix
[-2*xstride
];
2979 const int q0
= pix
[0];
2980 const int q1
= pix
[1*xstride
];
2982 if( FFABS( p0
- q0
) < alpha
&&
2983 FFABS( p1
- p0
) < beta
&&
2984 FFABS( q1
- q0
) < beta
) {
2986 int delta
= av_clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
2988 pix
[-xstride
] = av_clip_uint8( p0
+ delta
); /* p0' */
2989 pix
[0] = av_clip_uint8( q0
- delta
); /* q0' */
2995 static void h264_v_loop_filter_chroma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
2997 h264_loop_filter_chroma_c(pix
, stride
, 1, alpha
, beta
, tc0
);
2999 static void h264_h_loop_filter_chroma_c(uint8_t *pix
, int stride
, int alpha
, int beta
, int8_t *tc0
)
3001 h264_loop_filter_chroma_c(pix
, 1, stride
, alpha
, beta
, tc0
);
3004 static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix
, int xstride
, int ystride
, int alpha
, int beta
)
3007 for( d
= 0; d
< 8; d
++ ) {
3008 const int p0
= pix
[-1*xstride
];
3009 const int p1
= pix
[-2*xstride
];
3010 const int q0
= pix
[0];
3011 const int q1
= pix
[1*xstride
];
3013 if( FFABS( p0
- q0
) < alpha
&&
3014 FFABS( p1
- p0
) < beta
&&
3015 FFABS( q1
- q0
) < beta
) {
3017 pix
[-xstride
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
3018 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
3023 static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix
, int stride
, int alpha
, int beta
)
3025 h264_loop_filter_chroma_intra_c(pix
, stride
, 1, alpha
, beta
);
3027 static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix
, int stride
, int alpha
, int beta
)
3029 h264_loop_filter_chroma_intra_c(pix
, 1, stride
, alpha
, beta
);
3032 static inline int pix_abs16_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3038 s
+= abs(pix1
[0] - pix2
[0]);
3039 s
+= abs(pix1
[1] - pix2
[1]);
3040 s
+= abs(pix1
[2] - pix2
[2]);
3041 s
+= abs(pix1
[3] - pix2
[3]);
3042 s
+= abs(pix1
[4] - pix2
[4]);
3043 s
+= abs(pix1
[5] - pix2
[5]);
3044 s
+= abs(pix1
[6] - pix2
[6]);
3045 s
+= abs(pix1
[7] - pix2
[7]);
3046 s
+= abs(pix1
[8] - pix2
[8]);
3047 s
+= abs(pix1
[9] - pix2
[9]);
3048 s
+= abs(pix1
[10] - pix2
[10]);
3049 s
+= abs(pix1
[11] - pix2
[11]);
3050 s
+= abs(pix1
[12] - pix2
[12]);
3051 s
+= abs(pix1
[13] - pix2
[13]);
3052 s
+= abs(pix1
[14] - pix2
[14]);
3053 s
+= abs(pix1
[15] - pix2
[15]);
3060 static int pix_abs16_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3066 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
3067 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
3068 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
3069 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
3070 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
3071 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
3072 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
3073 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
3074 s
+= abs(pix1
[8] - avg2(pix2
[8], pix2
[9]));
3075 s
+= abs(pix1
[9] - avg2(pix2
[9], pix2
[10]));
3076 s
+= abs(pix1
[10] - avg2(pix2
[10], pix2
[11]));
3077 s
+= abs(pix1
[11] - avg2(pix2
[11], pix2
[12]));
3078 s
+= abs(pix1
[12] - avg2(pix2
[12], pix2
[13]));
3079 s
+= abs(pix1
[13] - avg2(pix2
[13], pix2
[14]));
3080 s
+= abs(pix1
[14] - avg2(pix2
[14], pix2
[15]));
3081 s
+= abs(pix1
[15] - avg2(pix2
[15], pix2
[16]));
3088 static int pix_abs16_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3091 uint8_t *pix3
= pix2
+ line_size
;
3095 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
3096 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
3097 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
3098 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
3099 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
3100 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
3101 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
3102 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
3103 s
+= abs(pix1
[8] - avg2(pix2
[8], pix3
[8]));
3104 s
+= abs(pix1
[9] - avg2(pix2
[9], pix3
[9]));
3105 s
+= abs(pix1
[10] - avg2(pix2
[10], pix3
[10]));
3106 s
+= abs(pix1
[11] - avg2(pix2
[11], pix3
[11]));
3107 s
+= abs(pix1
[12] - avg2(pix2
[12], pix3
[12]));
3108 s
+= abs(pix1
[13] - avg2(pix2
[13], pix3
[13]));
3109 s
+= abs(pix1
[14] - avg2(pix2
[14], pix3
[14]));
3110 s
+= abs(pix1
[15] - avg2(pix2
[15], pix3
[15]));
3118 static int pix_abs16_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3121 uint8_t *pix3
= pix2
+ line_size
;
3125 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
3126 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
3127 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
3128 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
3129 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
3130 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
3131 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
3132 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
3133 s
+= abs(pix1
[8] - avg4(pix2
[8], pix2
[9], pix3
[8], pix3
[9]));
3134 s
+= abs(pix1
[9] - avg4(pix2
[9], pix2
[10], pix3
[9], pix3
[10]));
3135 s
+= abs(pix1
[10] - avg4(pix2
[10], pix2
[11], pix3
[10], pix3
[11]));
3136 s
+= abs(pix1
[11] - avg4(pix2
[11], pix2
[12], pix3
[11], pix3
[12]));
3137 s
+= abs(pix1
[12] - avg4(pix2
[12], pix2
[13], pix3
[12], pix3
[13]));
3138 s
+= abs(pix1
[13] - avg4(pix2
[13], pix2
[14], pix3
[13], pix3
[14]));
3139 s
+= abs(pix1
[14] - avg4(pix2
[14], pix2
[15], pix3
[14], pix3
[15]));
3140 s
+= abs(pix1
[15] - avg4(pix2
[15], pix2
[16], pix3
[15], pix3
[16]));
3148 static inline int pix_abs8_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3154 s
+= abs(pix1
[0] - pix2
[0]);
3155 s
+= abs(pix1
[1] - pix2
[1]);
3156 s
+= abs(pix1
[2] - pix2
[2]);
3157 s
+= abs(pix1
[3] - pix2
[3]);
3158 s
+= abs(pix1
[4] - pix2
[4]);
3159 s
+= abs(pix1
[5] - pix2
[5]);
3160 s
+= abs(pix1
[6] - pix2
[6]);
3161 s
+= abs(pix1
[7] - pix2
[7]);
3168 static int pix_abs8_x2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3174 s
+= abs(pix1
[0] - avg2(pix2
[0], pix2
[1]));
3175 s
+= abs(pix1
[1] - avg2(pix2
[1], pix2
[2]));
3176 s
+= abs(pix1
[2] - avg2(pix2
[2], pix2
[3]));
3177 s
+= abs(pix1
[3] - avg2(pix2
[3], pix2
[4]));
3178 s
+= abs(pix1
[4] - avg2(pix2
[4], pix2
[5]));
3179 s
+= abs(pix1
[5] - avg2(pix2
[5], pix2
[6]));
3180 s
+= abs(pix1
[6] - avg2(pix2
[6], pix2
[7]));
3181 s
+= abs(pix1
[7] - avg2(pix2
[7], pix2
[8]));
3188 static int pix_abs8_y2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3191 uint8_t *pix3
= pix2
+ line_size
;
3195 s
+= abs(pix1
[0] - avg2(pix2
[0], pix3
[0]));
3196 s
+= abs(pix1
[1] - avg2(pix2
[1], pix3
[1]));
3197 s
+= abs(pix1
[2] - avg2(pix2
[2], pix3
[2]));
3198 s
+= abs(pix1
[3] - avg2(pix2
[3], pix3
[3]));
3199 s
+= abs(pix1
[4] - avg2(pix2
[4], pix3
[4]));
3200 s
+= abs(pix1
[5] - avg2(pix2
[5], pix3
[5]));
3201 s
+= abs(pix1
[6] - avg2(pix2
[6], pix3
[6]));
3202 s
+= abs(pix1
[7] - avg2(pix2
[7], pix3
[7]));
3210 static int pix_abs8_xy2_c(void *v
, uint8_t *pix1
, uint8_t *pix2
, int line_size
, int h
)
3213 uint8_t *pix3
= pix2
+ line_size
;
3217 s
+= abs(pix1
[0] - avg4(pix2
[0], pix2
[1], pix3
[0], pix3
[1]));
3218 s
+= abs(pix1
[1] - avg4(pix2
[1], pix2
[2], pix3
[1], pix3
[2]));
3219 s
+= abs(pix1
[2] - avg4(pix2
[2], pix2
[3], pix3
[2], pix3
[3]));
3220 s
+= abs(pix1
[3] - avg4(pix2
[3], pix2
[4], pix3
[3], pix3
[4]));
3221 s
+= abs(pix1
[4] - avg4(pix2
[4], pix2
[5], pix3
[4], pix3
[5]));
3222 s
+= abs(pix1
[5] - avg4(pix2
[5], pix2
[6], pix3
[5], pix3
[6]));
3223 s
+= abs(pix1
[6] - avg4(pix2
[6], pix2
[7], pix3
[6], pix3
[7]));
3224 s
+= abs(pix1
[7] - avg4(pix2
[7], pix2
[8], pix3
[7], pix3
[8]));
3232 static int nsse16_c(void *v
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3233 MpegEncContext
*c
= v
;
3239 for(x
=0; x
<16; x
++){
3240 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
3243 for(x
=0; x
<15; x
++){
3244 score2
+= FFABS( s1
[x
] - s1
[x
+stride
]
3245 - s1
[x
+1] + s1
[x
+1+stride
])
3246 -FFABS( s2
[x
] - s2
[x
+stride
]
3247 - s2
[x
+1] + s2
[x
+1+stride
]);
3254 if(c
) return score1
+ FFABS(score2
)*c
->avctx
->nsse_weight
;
3255 else return score1
+ FFABS(score2
)*8;
3258 static int nsse8_c(void *v
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3259 MpegEncContext
*c
= v
;
3266 score1
+= (s1
[x
] - s2
[x
])*(s1
[x
] - s2
[x
]);
3270 score2
+= FFABS( s1
[x
] - s1
[x
+stride
]
3271 - s1
[x
+1] + s1
[x
+1+stride
])
3272 -FFABS( s2
[x
] - s2
[x
+stride
]
3273 - s2
[x
+1] + s2
[x
+1+stride
]);
3280 if(c
) return score1
+ FFABS(score2
)*c
->avctx
->nsse_weight
;
3281 else return score1
+ FFABS(score2
)*8;
3284 static int try_8x8basis_c(int16_t rem
[64], int16_t weight
[64], int16_t basis
[64], int scale
){
3288 for(i
=0; i
<8*8; i
++){
3289 int b
= rem
[i
] + ((basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
));
3292 assert(-512<b
&& b
<512);
3294 sum
+= (w
*b
)*(w
*b
)>>4;
3299 static void add_8x8basis_c(int16_t rem
[64], int16_t basis
[64], int scale
){
3302 for(i
=0; i
<8*8; i
++){
3303 rem
[i
] += (basis
[i
]*scale
+ (1<<(BASIS_SHIFT
- RECON_SHIFT
-1)))>>(BASIS_SHIFT
- RECON_SHIFT
);
3308 * permutes an 8x8 block.
3309 * @param block the block which will be permuted according to the given permutation vector
3310 * @param permutation the permutation vector
3311 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
3312 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
3313 * (inverse) permutated to scantable order!
3315 void ff_block_permute(DCTELEM
*block
, uint8_t *permutation
, const uint8_t *scantable
, int last
)
3321 //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
3323 for(i
=0; i
<=last
; i
++){
3324 const int j
= scantable
[i
];
3329 for(i
=0; i
<=last
; i
++){
3330 const int j
= scantable
[i
];
3331 const int perm_j
= permutation
[j
];
3332 block
[perm_j
]= temp
[j
];
3336 static int zero_cmp(void *s
, uint8_t *a
, uint8_t *b
, int stride
, int h
){
3340 void ff_set_cmp(DSPContext
* c
, me_cmp_func
*cmp
, int type
){
3343 memset(cmp
, 0, sizeof(void*)*5);
3351 cmp
[i
]= c
->hadamard8_diff
[i
];
3357 cmp
[i
]= c
->dct_sad
[i
];
3360 cmp
[i
]= c
->dct264_sad
[i
];
3363 cmp
[i
]= c
->dct_max
[i
];
3366 cmp
[i
]= c
->quant_psnr
[i
];
3386 #ifdef CONFIG_SNOW_ENCODER
3395 av_log(NULL
, AV_LOG_ERROR
,"internal error in cmp function selection\n");
3401 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
3403 static void clear_blocks_c(DCTELEM
*blocks
)
3405 memset(blocks
, 0, sizeof(DCTELEM
)*6*64);
3408 static void add_bytes_c(uint8_t *dst
, uint8_t *src
, int w
){
3410 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
3411 long a
= *(long*)(src
+i
);
3412 long b
= *(long*)(dst
+i
);
3413 *(long*)(dst
+i
) = ((a
&pb_7f
) + (b
&pb_7f
)) ^ ((a
^b
)&pb_80
);
3416 dst
[i
+0] += src
[i
+0];
3419 static void add_bytes_l2_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
){
3421 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
3422 long a
= *(long*)(src1
+i
);
3423 long b
= *(long*)(src2
+i
);
3424 *(long*)(dst
+i
) = ((a
&pb_7f
) + (b
&pb_7f
)) ^ ((a
^b
)&pb_80
);
3427 dst
[i
] = src1
[i
]+src2
[i
];
3430 static void diff_bytes_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
){
3432 #ifndef HAVE_FAST_UNALIGNED
3433 if((long)src2
& (sizeof(long)-1)){
3434 for(i
=0; i
+7<w
; i
+=8){
3435 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
3436 dst
[i
+1] = src1
[i
+1]-src2
[i
+1];
3437 dst
[i
+2] = src1
[i
+2]-src2
[i
+2];
3438 dst
[i
+3] = src1
[i
+3]-src2
[i
+3];
3439 dst
[i
+4] = src1
[i
+4]-src2
[i
+4];
3440 dst
[i
+5] = src1
[i
+5]-src2
[i
+5];
3441 dst
[i
+6] = src1
[i
+6]-src2
[i
+6];
3442 dst
[i
+7] = src1
[i
+7]-src2
[i
+7];
3446 for(i
=0; i
<=w
-sizeof(long); i
+=sizeof(long)){
3447 long a
= *(long*)(src1
+i
);
3448 long b
= *(long*)(src2
+i
);
3449 *(long*)(dst
+i
) = ((a
|pb_80
) - (b
&pb_7f
)) ^ ((a
^b
^pb_80
)&pb_80
);
3452 dst
[i
+0] = src1
[i
+0]-src2
[i
+0];
3455 static void sub_hfyu_median_prediction_c(uint8_t *dst
, uint8_t *src1
, uint8_t *src2
, int w
, int *left
, int *left_top
){
3463 const int pred
= mid_pred(l
, src1
[i
], (l
+ src1
[i
] - lt
)&0xFF);
3473 #define BUTTERFLY2(o1,o2,i1,i2) \
3477 #define BUTTERFLY1(x,y) \
3486 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
3488 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s
, uint8_t *dst
, uint8_t *src
, int stride
, int h
){
3496 //FIXME try pointer walks
3497 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0]-dst
[stride
*i
+0],src
[stride
*i
+1]-dst
[stride
*i
+1]);
3498 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2]-dst
[stride
*i
+2],src
[stride
*i
+3]-dst
[stride
*i
+3]);
3499 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4]-dst
[stride
*i
+4],src
[stride
*i
+5]-dst
[stride
*i
+5]);
3500 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6]-dst
[stride
*i
+6],src
[stride
*i
+7]-dst
[stride
*i
+7]);
3502 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
3503 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
3504 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
3505 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
3507 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
3508 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
3509 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
3510 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
3514 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
3515 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
3516 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
3517 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
3519 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
3520 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
3521 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
3522 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
3525 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
3526 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
3527 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
3528 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
3534 printf("MAX:%d\n", maxi
);
3540 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s
, uint8_t *src
, uint8_t *dummy
, int stride
, int h
){
3548 //FIXME try pointer walks
3549 BUTTERFLY2(temp
[8*i
+0], temp
[8*i
+1], src
[stride
*i
+0],src
[stride
*i
+1]);
3550 BUTTERFLY2(temp
[8*i
+2], temp
[8*i
+3], src
[stride
*i
+2],src
[stride
*i
+3]);
3551 BUTTERFLY2(temp
[8*i
+4], temp
[8*i
+5], src
[stride
*i
+4],src
[stride
*i
+5]);
3552 BUTTERFLY2(temp
[8*i
+6], temp
[8*i
+7], src
[stride
*i
+6],src
[stride
*i
+7]);
3554 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+2]);
3555 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+3]);
3556 BUTTERFLY1(temp
[8*i
+4], temp
[8*i
+6]);
3557 BUTTERFLY1(temp
[8*i
+5], temp
[8*i
+7]);
3559 BUTTERFLY1(temp
[8*i
+0], temp
[8*i
+4]);
3560 BUTTERFLY1(temp
[8*i
+1], temp
[8*i
+5]);
3561 BUTTERFLY1(temp
[8*i
+2], temp
[8*i
+6]);
3562 BUTTERFLY1(temp
[8*i
+3], temp
[8*i
+7]);
3566 BUTTERFLY1(temp
[8*0+i
], temp
[8*1+i
]);
3567 BUTTERFLY1(temp
[8*2+i
], temp
[8*3+i
]);
3568 BUTTERFLY1(temp
[8*4+i
], temp
[8*5+i
]);
3569 BUTTERFLY1(temp
[8*6+i
], temp
[8*7+i
]);
3571 BUTTERFLY1(temp
[8*0+i
], temp
[8*2+i
]);
3572 BUTTERFLY1(temp
[8*1+i
], temp
[8*3+i
]);
3573 BUTTERFLY1(temp
[8*4+i
], temp
[8*6+i
]);
3574 BUTTERFLY1(temp
[8*5+i
], temp
[8*7+i
]);
3577 BUTTERFLYA(temp
[8*0+i
], temp
[8*4+i
])
3578 +BUTTERFLYA(temp
[8*1+i
], temp
[8*5+i
])
3579 +BUTTERFLYA(temp
[8*2+i
], temp
[8*6+i
])
3580 +BUTTERFLYA(temp
[8*3+i
], temp
[8*7+i
]);
3583 sum
-= FFABS(temp
[8*0] + temp
[8*4]); // -mean
3588 static int dct_sad8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3589 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3590 DECLARE_ALIGNED_16(uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3591 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3595 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3597 return s
->dsp
.sum_abs_dctelem(temp
);
3602 const int s07 = SRC(0) + SRC(7);\
3603 const int s16 = SRC(1) + SRC(6);\
3604 const int s25 = SRC(2) + SRC(5);\
3605 const int s34 = SRC(3) + SRC(4);\
3606 const int a0 = s07 + s34;\
3607 const int a1 = s16 + s25;\
3608 const int a2 = s07 - s34;\
3609 const int a3 = s16 - s25;\
3610 const int d07 = SRC(0) - SRC(7);\
3611 const int d16 = SRC(1) - SRC(6);\
3612 const int d25 = SRC(2) - SRC(5);\
3613 const int d34 = SRC(3) - SRC(4);\
3614 const int a4 = d16 + d25 + (d07 + (d07>>1));\
3615 const int a5 = d07 - d34 - (d25 + (d25>>1));\
3616 const int a6 = d07 + d34 - (d16 + (d16>>1));\
3617 const int a7 = d16 - d25 + (d34 + (d34>>1));\
3619 DST(1, a4 + (a7>>2)) ;\
3620 DST(2, a2 + (a3>>1)) ;\
3621 DST(3, a5 + (a6>>2)) ;\
3623 DST(5, a6 - (a5>>2)) ;\
3624 DST(6, (a2>>1) - a3 ) ;\
3625 DST(7, (a4>>2) - a7 ) ;\
3628 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3629 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3634 s
->dsp
.diff_pixels(dct
[0], src1
, src2
, stride
);
3636 #define SRC(x) dct[i][x]
3637 #define DST(x,v) dct[i][x]= v
3638 for( i
= 0; i
< 8; i
++ )
3643 #define SRC(x) dct[x][i]
3644 #define DST(x,v) sum += FFABS(v)
3645 for( i
= 0; i
< 8; i
++ )
3653 static int dct_max8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3654 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3655 DECLARE_ALIGNED_8(uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3656 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3661 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3665 sum
= FFMAX(sum
, FFABS(temp
[i
]));
3670 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3671 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3672 DECLARE_ALIGNED_8 (uint64_t, aligned_temp
[sizeof(DCTELEM
)*64*2/8]);
3673 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3674 DCTELEM
* const bak
= ((DCTELEM
*)aligned_temp
)+64;
3680 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3682 memcpy(bak
, temp
, 64*sizeof(DCTELEM
));
3684 s
->block_last_index
[0/*FIXME*/]= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3685 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
3686 ff_simple_idct(temp
); //FIXME
3689 sum
+= (temp
[i
]-bak
[i
])*(temp
[i
]-bak
[i
]);
3694 static int rd8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3695 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3696 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
3697 DECLARE_ALIGNED_8 (uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3698 DECLARE_ALIGNED_8 (uint64_t, aligned_bak
[stride
]);
3699 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3700 uint8_t * const bak
= (uint8_t*)aligned_bak
;
3701 int i
, last
, run
, bits
, level
, distortion
, start_i
;
3702 const int esc_length
= s
->ac_esc_length
;
3704 uint8_t * last_length
;
3709 ((uint32_t*)(bak
+ i
*stride
))[0]= ((uint32_t*)(src2
+ i
*stride
))[0];
3710 ((uint32_t*)(bak
+ i
*stride
))[1]= ((uint32_t*)(src2
+ i
*stride
))[1];
3713 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3715 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3721 length
= s
->intra_ac_vlc_length
;
3722 last_length
= s
->intra_ac_vlc_last_length
;
3723 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
3726 length
= s
->inter_ac_vlc_length
;
3727 last_length
= s
->inter_ac_vlc_last_length
;
3732 for(i
=start_i
; i
<last
; i
++){
3733 int j
= scantable
[i
];
3738 if((level
&(~127)) == 0){
3739 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
3748 level
= temp
[i
] + 64;
3752 if((level
&(~127)) == 0){
3753 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
3761 s
->dct_unquantize_intra(s
, temp
, 0, s
->qscale
);
3763 s
->dct_unquantize_inter(s
, temp
, 0, s
->qscale
);
3766 s
->dsp
.idct_add(bak
, stride
, temp
);
3768 distortion
= s
->dsp
.sse
[1](NULL
, bak
, src1
, stride
, 8);
3770 return distortion
+ ((bits
*s
->qscale
*s
->qscale
*109 + 64)>>7);
3773 static int bit8x8_c(/*MpegEncContext*/ void *c
, uint8_t *src1
, uint8_t *src2
, int stride
, int h
){
3774 MpegEncContext
* const s
= (MpegEncContext
*)c
;
3775 const uint8_t *scantable
= s
->intra_scantable
.permutated
;
3776 DECLARE_ALIGNED_8 (uint64_t, aligned_temp
[sizeof(DCTELEM
)*64/8]);
3777 DCTELEM
* const temp
= (DCTELEM
*)aligned_temp
;
3778 int i
, last
, run
, bits
, level
, start_i
;
3779 const int esc_length
= s
->ac_esc_length
;
3781 uint8_t * last_length
;
3785 s
->dsp
.diff_pixels(temp
, src1
, src2
, stride
);
3787 s
->block_last_index
[0/*FIXME*/]= last
= s
->fast_dct_quantize(s
, temp
, 0/*FIXME*/, s
->qscale
, &i
);
3793 length
= s
->intra_ac_vlc_length
;
3794 last_length
= s
->intra_ac_vlc_last_length
;
3795 bits
+= s
->luma_dc_vlc_length
[temp
[0] + 256]; //FIXME chroma
3798 length
= s
->inter_ac_vlc_length
;
3799 last_length
= s
->inter_ac_vlc_last_length
;
3804 for(i
=start_i
; i
<last
; i
++){
3805 int j
= scantable
[i
];
3810 if((level
&(~127)) == 0){
3811 bits
+= length
[UNI_AC_ENC_INDEX(run
, level
)];
3820 level
= temp
[i
] + 64;
3824 if((level
&(~127)) == 0){
3825 bits
+= last_length
[UNI_AC_ENC_INDEX(run
, level
)];
3833 static int vsad_intra16_c(/*MpegEncContext*/ void *c
, uint8_t *s
, uint8_t *dummy
, int stride
, int h
){
3838 for(x
=0; x
<16; x
+=4){
3839 score
+= FFABS(s
[x
] - s
[x
+stride
]) + FFABS(s
[x
+1] - s
[x
+1+stride
])
3840 +FFABS(s
[x
+2] - s
[x
+2+stride
]) + FFABS(s
[x
+3] - s
[x
+3+stride
]);
3848 static int vsad16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3853 for(x
=0; x
<16; x
++){
3854 score
+= FFABS(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
3863 #define SQ(a) ((a)*(a))
3864 static int vsse_intra16_c(/*MpegEncContext*/ void *c
, uint8_t *s
, uint8_t *dummy
, int stride
, int h
){
3869 for(x
=0; x
<16; x
+=4){
3870 score
+= SQ(s
[x
] - s
[x
+stride
]) + SQ(s
[x
+1] - s
[x
+1+stride
])
3871 +SQ(s
[x
+2] - s
[x
+2+stride
]) + SQ(s
[x
+3] - s
[x
+3+stride
]);
3879 static int vsse16_c(/*MpegEncContext*/ void *c
, uint8_t *s1
, uint8_t *s2
, int stride
, int h
){
3884 for(x
=0; x
<16; x
++){
3885 score
+= SQ(s1
[x
] - s2
[x
] - s1
[x
+stride
] + s2
[x
+stride
]);
3894 static int ssd_int8_vs_int16_c(const int8_t *pix1
, const int16_t *pix2
,
3898 for(i
=0; i
<size
; i
++)
3899 score
+= (pix1
[i
]-pix2
[i
])*(pix1
[i
]-pix2
[i
]);
3903 WRAPPER8_16_SQ(hadamard8_diff8x8_c
, hadamard8_diff16_c
)
3904 WRAPPER8_16_SQ(hadamard8_intra8x8_c
, hadamard8_intra16_c
)
3905 WRAPPER8_16_SQ(dct_sad8x8_c
, dct_sad16_c
)
3907 WRAPPER8_16_SQ(dct264_sad8x8_c
, dct264_sad16_c
)
3909 WRAPPER8_16_SQ(dct_max8x8_c
, dct_max16_c
)
3910 WRAPPER8_16_SQ(quant_psnr8x8_c
, quant_psnr16_c
)
3911 WRAPPER8_16_SQ(rd8x8_c
, rd16_c
)
3912 WRAPPER8_16_SQ(bit8x8_c
, bit16_c
)
3914 static void vector_fmul_c(float *dst
, const float *src
, int len
){
3916 for(i
=0; i
<len
; i
++)
3920 static void vector_fmul_reverse_c(float *dst
, const float *src0
, const float *src1
, int len
){
3923 for(i
=0; i
<len
; i
++)
3924 dst
[i
] = src0
[i
] * src1
[-i
];
3927 void ff_vector_fmul_add_add_c(float *dst
, const float *src0
, const float *src1
, const float *src2
, int src3
, int len
, int step
){
3929 for(i
=0; i
<len
; i
++)
3930 dst
[i
*step
] = src0
[i
] * src1
[i
] + src2
[i
] + src3
;
3933 void ff_float_to_int16_c(int16_t *dst
, const float *src
, int len
){
3935 for(i
=0; i
<len
; i
++) {
3936 int_fast32_t tmp
= ((const int32_t*)src
)[i
];
3938 tmp
= (0x43c0ffff - tmp
)>>31;
3939 // is this faster on some gcc/cpu combinations?
3940 // if(tmp > 0x43c0ffff) tmp = 0xFFFF;
3943 dst
[i
] = tmp
- 0x8000;
3948 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
3949 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
3950 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
3951 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
3952 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
3953 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
3954 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
3956 static void wmv2_idct_row(short * b
)
3959 int a0
,a1
,a2
,a3
,a4
,a5
,a6
,a7
;
3961 a1
= W1
*b
[1]+W7
*b
[7];
3962 a7
= W7
*b
[1]-W1
*b
[7];
3963 a5
= W5
*b
[5]+W3
*b
[3];
3964 a3
= W3
*b
[5]-W5
*b
[3];
3965 a2
= W2
*b
[2]+W6
*b
[6];
3966 a6
= W6
*b
[2]-W2
*b
[6];
3967 a0
= W0
*b
[0]+W0
*b
[4];
3968 a4
= W0
*b
[0]-W0
*b
[4];
3970 s1
= (181*(a1
-a5
+a7
-a3
)+128)>>8;//1,3,5,7,
3971 s2
= (181*(a1
-a5
-a7
+a3
)+128)>>8;
3973 b
[0] = (a0
+a2
+a1
+a5
+ (1<<7))>>8;
3974 b
[1] = (a4
+a6
+s1
+ (1<<7))>>8;
3975 b
[2] = (a4
-a6
+s2
+ (1<<7))>>8;
3976 b
[3] = (a0
-a2
+a7
+a3
+ (1<<7))>>8;
3977 b
[4] = (a0
-a2
-a7
-a3
+ (1<<7))>>8;
3978 b
[5] = (a4
-a6
-s2
+ (1<<7))>>8;
3979 b
[6] = (a4
+a6
-s1
+ (1<<7))>>8;
3980 b
[7] = (a0
+a2
-a1
-a5
+ (1<<7))>>8;
3982 static void wmv2_idct_col(short * b
)
3985 int a0
,a1
,a2
,a3
,a4
,a5
,a6
,a7
;
3986 /*step 1, with extended precision*/
3987 a1
= (W1
*b
[8*1]+W7
*b
[8*7] + 4)>>3;
3988 a7
= (W7
*b
[8*1]-W1
*b
[8*7] + 4)>>3;
3989 a5
= (W5
*b
[8*5]+W3
*b
[8*3] + 4)>>3;
3990 a3
= (W3
*b
[8*5]-W5
*b
[8*3] + 4)>>3;
3991 a2
= (W2
*b
[8*2]+W6
*b
[8*6] + 4)>>3;
3992 a6
= (W6
*b
[8*2]-W2
*b
[8*6] + 4)>>3;
3993 a0
= (W0
*b
[8*0]+W0
*b
[8*4] )>>3;
3994 a4
= (W0
*b
[8*0]-W0
*b
[8*4] )>>3;
3996 s1
= (181*(a1
-a5
+a7
-a3
)+128)>>8;
3997 s2
= (181*(a1
-a5
-a7
+a3
)+128)>>8;
3999 b
[8*0] = (a0
+a2
+a1
+a5
+ (1<<13))>>14;
4000 b
[8*1] = (a4
+a6
+s1
+ (1<<13))>>14;
4001 b
[8*2] = (a4
-a6
+s2
+ (1<<13))>>14;
4002 b
[8*3] = (a0
-a2
+a7
+a3
+ (1<<13))>>14;
4004 b
[8*4] = (a0
-a2
-a7
-a3
+ (1<<13))>>14;
4005 b
[8*5] = (a4
-a6
-s2
+ (1<<13))>>14;
4006 b
[8*6] = (a4
+a6
-s1
+ (1<<13))>>14;
4007 b
[8*7] = (a0
+a2
-a1
-a5
+ (1<<13))>>14;
4009 void ff_wmv2_idct_c(short * block
){
4013 wmv2_idct_row(block
+i
);
4016 wmv2_idct_col(block
+i
);
4019 /* XXX: those functions should be suppressed ASAP when all IDCTs are
4021 static void ff_wmv2_idct_put_c(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4023 ff_wmv2_idct_c(block
);
4024 put_pixels_clamped_c(block
, dest
, line_size
);
4026 static void ff_wmv2_idct_add_c(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4028 ff_wmv2_idct_c(block
);
4029 add_pixels_clamped_c(block
, dest
, line_size
);
4031 static void ff_jref_idct_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4034 put_pixels_clamped_c(block
, dest
, line_size
);
4036 static void ff_jref_idct_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4039 add_pixels_clamped_c(block
, dest
, line_size
);
4042 static void ff_jref_idct4_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4045 put_pixels_clamped4_c(block
, dest
, line_size
);
4047 static void ff_jref_idct4_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4050 add_pixels_clamped4_c(block
, dest
, line_size
);
4053 static void ff_jref_idct2_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4056 put_pixels_clamped2_c(block
, dest
, line_size
);
4058 static void ff_jref_idct2_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4061 add_pixels_clamped2_c(block
, dest
, line_size
);
4064 static void ff_jref_idct1_put(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4066 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
4068 dest
[0] = cm
[(block
[0] + 4)>>3];
4070 static void ff_jref_idct1_add(uint8_t *dest
, int line_size
, DCTELEM
*block
)
4072 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
4074 dest
[0] = cm
[dest
[0] + ((block
[0] + 4)>>3)];
4077 static void just_return(void *mem av_unused
, int stride av_unused
, int h av_unused
) { return; }
4079 /* init static data */
4080 void dsputil_static_init(void)
4084 for(i
=0;i
<256;i
++) ff_cropTbl
[i
+ MAX_NEG_CROP
] = i
;
4085 for(i
=0;i
<MAX_NEG_CROP
;i
++) {
4087 ff_cropTbl
[i
+ MAX_NEG_CROP
+ 256] = 255;
4090 for(i
=0;i
<512;i
++) {
4091 ff_squareTbl
[i
] = (i
- 256) * (i
- 256);
4094 for(i
=0; i
<64; i
++) inv_zigzag_direct16
[ff_zigzag_direct
[i
]]= i
+1;
4097 int ff_check_alignment(void){
4098 static int did_fail
=0;
4099 DECLARE_ALIGNED_16(int, aligned
);
4101 if((long)&aligned
& 15){
4103 #if defined(HAVE_MMX) || defined(HAVE_ALTIVEC)
4104 av_log(NULL
, AV_LOG_ERROR
,
4105 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
4106 "and may be very slow or crash. This is not a bug in libavcodec,\n"
4107 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
4108 "Do not report crashes to FFmpeg developers.\n");
4117 void dsputil_init(DSPContext
* c
, AVCodecContext
*avctx
)
4121 ff_check_alignment();
4123 #ifdef CONFIG_ENCODERS
4124 if(avctx
->dct_algo
==FF_DCT_FASTINT
) {
4125 c
->fdct
= fdct_ifast
;
4126 c
->fdct248
= fdct_ifast248
;
4128 else if(avctx
->dct_algo
==FF_DCT_FAAN
) {
4129 c
->fdct
= ff_faandct
;
4130 c
->fdct248
= ff_faandct248
;
4133 c
->fdct
= ff_jpeg_fdct_islow
; //slow/accurate/default
4134 c
->fdct248
= ff_fdct248_islow
;
4136 #endif //CONFIG_ENCODERS
4138 if(avctx
->lowres
==1){
4139 if(avctx
->idct_algo
==FF_IDCT_INT
|| avctx
->idct_algo
==FF_IDCT_AUTO
|| !ENABLE_H264_DECODER
){
4140 c
->idct_put
= ff_jref_idct4_put
;
4141 c
->idct_add
= ff_jref_idct4_add
;
4143 c
->idct_put
= ff_h264_lowres_idct_put_c
;
4144 c
->idct_add
= ff_h264_lowres_idct_add_c
;
4146 c
->idct
= j_rev_dct4
;
4147 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4148 }else if(avctx
->lowres
==2){
4149 c
->idct_put
= ff_jref_idct2_put
;
4150 c
->idct_add
= ff_jref_idct2_add
;
4151 c
->idct
= j_rev_dct2
;
4152 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4153 }else if(avctx
->lowres
==3){
4154 c
->idct_put
= ff_jref_idct1_put
;
4155 c
->idct_add
= ff_jref_idct1_add
;
4156 c
->idct
= j_rev_dct1
;
4157 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4159 if(avctx
->idct_algo
==FF_IDCT_INT
){
4160 c
->idct_put
= ff_jref_idct_put
;
4161 c
->idct_add
= ff_jref_idct_add
;
4162 c
->idct
= j_rev_dct
;
4163 c
->idct_permutation_type
= FF_LIBMPEG2_IDCT_PERM
;
4164 }else if((ENABLE_VP3_DECODER
|| ENABLE_VP5_DECODER
|| ENABLE_VP6_DECODER
|| ENABLE_THEORA_DECODER
) &&
4165 avctx
->idct_algo
==FF_IDCT_VP3
){
4166 c
->idct_put
= ff_vp3_idct_put_c
;
4167 c
->idct_add
= ff_vp3_idct_add_c
;
4168 c
->idct
= ff_vp3_idct_c
;
4169 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4170 }else if(avctx
->idct_algo
==FF_IDCT_WMV2
){
4171 c
->idct_put
= ff_wmv2_idct_put_c
;
4172 c
->idct_add
= ff_wmv2_idct_add_c
;
4173 c
->idct
= ff_wmv2_idct_c
;
4174 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4175 }else if(avctx
->idct_algo
==FF_IDCT_FAAN
){
4176 c
->idct_put
= ff_faanidct_put
;
4177 c
->idct_add
= ff_faanidct_add
;
4178 c
->idct
= ff_faanidct
;
4179 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4180 }else{ //accurate/default
4181 c
->idct_put
= ff_simple_idct_put
;
4182 c
->idct_add
= ff_simple_idct_add
;
4183 c
->idct
= ff_simple_idct
;
4184 c
->idct_permutation_type
= FF_NO_IDCT_PERM
;
4188 if (ENABLE_H264_DECODER
) {
4189 c
->h264_idct_add
= ff_h264_idct_add_c
;
4190 c
->h264_idct8_add
= ff_h264_idct8_add_c
;
4191 c
->h264_idct_dc_add
= ff_h264_idct_dc_add_c
;
4192 c
->h264_idct8_dc_add
= ff_h264_idct8_dc_add_c
;
4195 c
->get_pixels
= get_pixels_c
;
4196 c
->diff_pixels
= diff_pixels_c
;
4197 c
->put_pixels_clamped
= put_pixels_clamped_c
;
4198 c
->put_signed_pixels_clamped
= put_signed_pixels_clamped_c
;
4199 c
->add_pixels_clamped
= add_pixels_clamped_c
;
4200 c
->add_pixels8
= add_pixels8_c
;
4201 c
->add_pixels4
= add_pixels4_c
;
4202 c
->sum_abs_dctelem
= sum_abs_dctelem_c
;
4205 c
->clear_blocks
= clear_blocks_c
;
4206 c
->pix_sum
= pix_sum_c
;
4207 c
->pix_norm1
= pix_norm1_c
;
4209 /* TODO [0] 16 [1] 8 */
4210 c
->pix_abs
[0][0] = pix_abs16_c
;
4211 c
->pix_abs
[0][1] = pix_abs16_x2_c
;
4212 c
->pix_abs
[0][2] = pix_abs16_y2_c
;
4213 c
->pix_abs
[0][3] = pix_abs16_xy2_c
;
4214 c
->pix_abs
[1][0] = pix_abs8_c
;
4215 c
->pix_abs
[1][1] = pix_abs8_x2_c
;
4216 c
->pix_abs
[1][2] = pix_abs8_y2_c
;
4217 c
->pix_abs
[1][3] = pix_abs8_xy2_c
;
4219 #define dspfunc(PFX, IDX, NUM) \
4220 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
4221 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
4222 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
4223 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
4225 dspfunc(put
, 0, 16);
4226 dspfunc(put_no_rnd
, 0, 16);
4228 dspfunc(put_no_rnd
, 1, 8);
4232 dspfunc(avg
, 0, 16);
4233 dspfunc(avg_no_rnd
, 0, 16);
4235 dspfunc(avg_no_rnd
, 1, 8);
4240 c
->put_no_rnd_pixels_l2
[0]= put_no_rnd_pixels16_l2_c
;
4241 c
->put_no_rnd_pixels_l2
[1]= put_no_rnd_pixels8_l2_c
;
4243 c
->put_tpel_pixels_tab
[ 0] = put_tpel_pixels_mc00_c
;
4244 c
->put_tpel_pixels_tab
[ 1] = put_tpel_pixels_mc10_c
;
4245 c
->put_tpel_pixels_tab
[ 2] = put_tpel_pixels_mc20_c
;
4246 c
->put_tpel_pixels_tab
[ 4] = put_tpel_pixels_mc01_c
;
4247 c
->put_tpel_pixels_tab
[ 5] = put_tpel_pixels_mc11_c
;
4248 c
->put_tpel_pixels_tab
[ 6] = put_tpel_pixels_mc21_c
;
4249 c
->put_tpel_pixels_tab
[ 8] = put_tpel_pixels_mc02_c
;
4250 c
->put_tpel_pixels_tab
[ 9] = put_tpel_pixels_mc12_c
;
4251 c
->put_tpel_pixels_tab
[10] = put_tpel_pixels_mc22_c
;
4253 c
->avg_tpel_pixels_tab
[ 0] = avg_tpel_pixels_mc00_c
;
4254 c
->avg_tpel_pixels_tab
[ 1] = avg_tpel_pixels_mc10_c
;
4255 c
->avg_tpel_pixels_tab
[ 2] = avg_tpel_pixels_mc20_c
;
4256 c
->avg_tpel_pixels_tab
[ 4] = avg_tpel_pixels_mc01_c
;
4257 c
->avg_tpel_pixels_tab
[ 5] = avg_tpel_pixels_mc11_c
;
4258 c
->avg_tpel_pixels_tab
[ 6] = avg_tpel_pixels_mc21_c
;
4259 c
->avg_tpel_pixels_tab
[ 8] = avg_tpel_pixels_mc02_c
;
4260 c
->avg_tpel_pixels_tab
[ 9] = avg_tpel_pixels_mc12_c
;
4261 c
->avg_tpel_pixels_tab
[10] = avg_tpel_pixels_mc22_c
;
4263 #define dspfunc(PFX, IDX, NUM) \
4264 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
4265 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
4266 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
4267 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
4268 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
4269 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
4270 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
4271 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
4272 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
4273 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
4274 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
4275 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
4276 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
4277 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
4278 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
4279 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
4281 dspfunc(put_qpel
, 0, 16);
4282 dspfunc(put_no_rnd_qpel
, 0, 16);
4284 dspfunc(avg_qpel
, 0, 16);
4285 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
4287 dspfunc(put_qpel
, 1, 8);
4288 dspfunc(put_no_rnd_qpel
, 1, 8);
4290 dspfunc(avg_qpel
, 1, 8);
4291 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
4293 dspfunc(put_h264_qpel
, 0, 16);
4294 dspfunc(put_h264_qpel
, 1, 8);
4295 dspfunc(put_h264_qpel
, 2, 4);
4296 dspfunc(put_h264_qpel
, 3, 2);
4297 dspfunc(avg_h264_qpel
, 0, 16);
4298 dspfunc(avg_h264_qpel
, 1, 8);
4299 dspfunc(avg_h264_qpel
, 2, 4);
4302 c
->put_h264_chroma_pixels_tab
[0]= put_h264_chroma_mc8_c
;
4303 c
->put_h264_chroma_pixels_tab
[1]= put_h264_chroma_mc4_c
;
4304 c
->put_h264_chroma_pixels_tab
[2]= put_h264_chroma_mc2_c
;
4305 c
->avg_h264_chroma_pixels_tab
[0]= avg_h264_chroma_mc8_c
;
4306 c
->avg_h264_chroma_pixels_tab
[1]= avg_h264_chroma_mc4_c
;
4307 c
->avg_h264_chroma_pixels_tab
[2]= avg_h264_chroma_mc2_c
;
4308 c
->put_no_rnd_h264_chroma_pixels_tab
[0]= put_no_rnd_h264_chroma_mc8_c
;
4310 c
->weight_h264_pixels_tab
[0]= weight_h264_pixels16x16_c
;
4311 c
->weight_h264_pixels_tab
[1]= weight_h264_pixels16x8_c
;
4312 c
->weight_h264_pixels_tab
[2]= weight_h264_pixels8x16_c
;
4313 c
->weight_h264_pixels_tab
[3]= weight_h264_pixels8x8_c
;
4314 c
->weight_h264_pixels_tab
[4]= weight_h264_pixels8x4_c
;
4315 c
->weight_h264_pixels_tab
[5]= weight_h264_pixels4x8_c
;
4316 c
->weight_h264_pixels_tab
[6]= weight_h264_pixels4x4_c
;
4317 c
->weight_h264_pixels_tab
[7]= weight_h264_pixels4x2_c
;
4318 c
->weight_h264_pixels_tab
[8]= weight_h264_pixels2x4_c
;
4319 c
->weight_h264_pixels_tab
[9]= weight_h264_pixels2x2_c
;
4320 c
->biweight_h264_pixels_tab
[0]= biweight_h264_pixels16x16_c
;
4321 c
->biweight_h264_pixels_tab
[1]= biweight_h264_pixels16x8_c
;
4322 c
->biweight_h264_pixels_tab
[2]= biweight_h264_pixels8x16_c
;
4323 c
->biweight_h264_pixels_tab
[3]= biweight_h264_pixels8x8_c
;
4324 c
->biweight_h264_pixels_tab
[4]= biweight_h264_pixels8x4_c
;
4325 c
->biweight_h264_pixels_tab
[5]= biweight_h264_pixels4x8_c
;
4326 c
->biweight_h264_pixels_tab
[6]= biweight_h264_pixels4x4_c
;
4327 c
->biweight_h264_pixels_tab
[7]= biweight_h264_pixels4x2_c
;
4328 c
->biweight_h264_pixels_tab
[8]= biweight_h264_pixels2x4_c
;
4329 c
->biweight_h264_pixels_tab
[9]= biweight_h264_pixels2x2_c
;
4331 c
->draw_edges
= draw_edges_c
;
4333 #ifdef CONFIG_CAVS_DECODER
4334 ff_cavsdsp_init(c
,avctx
);
4336 #if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4337 ff_vc1dsp_init(c
,avctx
);
4339 #if defined(CONFIG_WMV2_DECODER) || defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER)
4340 ff_intrax8dsp_init(c
,avctx
);
4342 #if defined(CONFIG_H264_ENCODER)
4343 ff_h264dspenc_init(c
,avctx
);
4346 c
->put_mspel_pixels_tab
[0]= put_mspel8_mc00_c
;
4347 c
->put_mspel_pixels_tab
[1]= put_mspel8_mc10_c
;
4348 c
->put_mspel_pixels_tab
[2]= put_mspel8_mc20_c
;
4349 c
->put_mspel_pixels_tab
[3]= put_mspel8_mc30_c
;
4350 c
->put_mspel_pixels_tab
[4]= put_mspel8_mc02_c
;
4351 c
->put_mspel_pixels_tab
[5]= put_mspel8_mc12_c
;
4352 c
->put_mspel_pixels_tab
[6]= put_mspel8_mc22_c
;
4353 c
->put_mspel_pixels_tab
[7]= put_mspel8_mc32_c
;
4355 #define SET_CMP_FUNC(name) \
4356 c->name[0]= name ## 16_c;\
4357 c->name[1]= name ## 8x8_c;
4359 SET_CMP_FUNC(hadamard8_diff
)
4360 c
->hadamard8_diff
[4]= hadamard8_intra16_c
;
4361 SET_CMP_FUNC(dct_sad
)
4362 SET_CMP_FUNC(dct_max
)
4364 SET_CMP_FUNC(dct264_sad
)
4366 c
->sad
[0]= pix_abs16_c
;
4367 c
->sad
[1]= pix_abs8_c
;
4371 SET_CMP_FUNC(quant_psnr
)
4374 c
->vsad
[0]= vsad16_c
;
4375 c
->vsad
[4]= vsad_intra16_c
;
4376 c
->vsse
[0]= vsse16_c
;
4377 c
->vsse
[4]= vsse_intra16_c
;
4378 c
->nsse
[0]= nsse16_c
;
4379 c
->nsse
[1]= nsse8_c
;
4380 #ifdef CONFIG_SNOW_ENCODER
4381 c
->w53
[0]= w53_16_c
;
4383 c
->w97
[0]= w97_16_c
;
4387 c
->ssd_int8_vs_int16
= ssd_int8_vs_int16_c
;
4389 c
->add_bytes
= add_bytes_c
;
4390 c
->add_bytes_l2
= add_bytes_l2_c
;
4391 c
->diff_bytes
= diff_bytes_c
;
4392 c
->sub_hfyu_median_prediction
= sub_hfyu_median_prediction_c
;
4393 c
->bswap_buf
= bswap_buf
;
4394 #ifdef CONFIG_PNG_DECODER
4395 c
->add_png_paeth_prediction
= ff_add_png_paeth_prediction
;
4398 c
->h264_v_loop_filter_luma
= h264_v_loop_filter_luma_c
;
4399 c
->h264_h_loop_filter_luma
= h264_h_loop_filter_luma_c
;
4400 c
->h264_v_loop_filter_chroma
= h264_v_loop_filter_chroma_c
;
4401 c
->h264_h_loop_filter_chroma
= h264_h_loop_filter_chroma_c
;
4402 c
->h264_v_loop_filter_chroma_intra
= h264_v_loop_filter_chroma_intra_c
;
4403 c
->h264_h_loop_filter_chroma_intra
= h264_h_loop_filter_chroma_intra_c
;
4404 c
->h264_loop_filter_strength
= NULL
;
4406 if (ENABLE_ANY_H263
) {
4407 c
->h263_h_loop_filter
= h263_h_loop_filter_c
;
4408 c
->h263_v_loop_filter
= h263_v_loop_filter_c
;
4411 c
->h261_loop_filter
= h261_loop_filter_c
;
4413 c
->try_8x8basis
= try_8x8basis_c
;
4414 c
->add_8x8basis
= add_8x8basis_c
;
4416 #ifdef CONFIG_SNOW_DECODER
4417 c
->vertical_compose97i
= ff_snow_vertical_compose97i
;
4418 c
->horizontal_compose97i
= ff_snow_horizontal_compose97i
;
4419 c
->inner_add_yblock
= ff_snow_inner_add_yblock
;
4422 #ifdef CONFIG_VORBIS_DECODER
4423 c
->vorbis_inverse_coupling
= vorbis_inverse_coupling
;
4425 #ifdef CONFIG_FLAC_ENCODER
4426 c
->flac_compute_autocorr
= ff_flac_compute_autocorr
;
4428 c
->vector_fmul
= vector_fmul_c
;
4429 c
->vector_fmul_reverse
= vector_fmul_reverse_c
;
4430 c
->vector_fmul_add_add
= ff_vector_fmul_add_add_c
;
4431 c
->float_to_int16
= ff_float_to_int16_c
;
4433 c
->shrink
[0]= ff_img_copy_plane
;
4434 c
->shrink
[1]= ff_shrink22
;
4435 c
->shrink
[2]= ff_shrink44
;
4436 c
->shrink
[3]= ff_shrink88
;
4438 c
->prefetch
= just_return
;
4440 memset(c
->put_2tap_qpel_pixels_tab
, 0, sizeof(c
->put_2tap_qpel_pixels_tab
));
4441 memset(c
->avg_2tap_qpel_pixels_tab
, 0, sizeof(c
->avg_2tap_qpel_pixels_tab
));
4443 if (ENABLE_MMX
) dsputil_init_mmx (c
, avctx
);
4444 if (ENABLE_ARMV4L
) dsputil_init_armv4l(c
, avctx
);
4445 if (ENABLE_MLIB
) dsputil_init_mlib (c
, avctx
);
4446 if (ENABLE_VIS
) dsputil_init_vis (c
, avctx
);
4447 if (ENABLE_ALPHA
) dsputil_init_alpha (c
, avctx
);
4448 if (ENABLE_POWERPC
) dsputil_init_ppc (c
, avctx
);
4449 if (ENABLE_MMI
) dsputil_init_mmi (c
, avctx
);
4450 if (ENABLE_SH4
) dsputil_init_sh4 (c
, avctx
);
4451 if (ENABLE_BFIN
) dsputil_init_bfin (c
, avctx
);
4453 for(i
=0; i
<64; i
++){
4454 if(!c
->put_2tap_qpel_pixels_tab
[0][i
])
4455 c
->put_2tap_qpel_pixels_tab
[0][i
]= c
->put_h264_qpel_pixels_tab
[0][i
];
4456 if(!c
->avg_2tap_qpel_pixels_tab
[0][i
])
4457 c
->avg_2tap_qpel_pixels_tab
[0][i
]= c
->avg_h264_qpel_pixels_tab
[0][i
];
4460 switch(c
->idct_permutation_type
){
4461 case FF_NO_IDCT_PERM
:
4463 c
->idct_permutation
[i
]= i
;
4465 case FF_LIBMPEG2_IDCT_PERM
:
4467 c
->idct_permutation
[i
]= (i
& 0x38) | ((i
& 6) >> 1) | ((i
& 1) << 2);
4469 case FF_SIMPLE_IDCT_PERM
:
4471 c
->idct_permutation
[i
]= simple_mmx_permutation
[i
];
4473 case FF_TRANSPOSE_IDCT_PERM
:
4475 c
->idct_permutation
[i
]= ((i
&7)<<3) | (i
>>3);
4477 case FF_PARTTRANS_IDCT_PERM
:
4479 c
->idct_permutation
[i
]= (i
&0x24) | ((i
&3)<<3) | ((i
>>3)&3);
4481 case FF_SSE2_IDCT_PERM
:
4483 c
->idct_permutation
[i
]= (i
&0x38) | idct_sse2_row_perm
[i
&7];
4486 av_log(avctx
, AV_LOG_ERROR
, "Internal error, IDCT permutation not set\n");