lavfi: switch to AVFrame.
[FFMpeg-mirror/mplayer-patches.git] / libavcodec / dsputil.c
blob62fa0acbdefdf17a0cdec8099a10f12163e419a7
1 /*
2 * DSP utils
3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 /**
26 * @file
27 * DSP utils
30 #include "libavutil/imgutils.h"
31 #include "libavutil/internal.h"
32 #include "avcodec.h"
33 #include "copy_block.h"
34 #include "dct.h"
35 #include "dsputil.h"
36 #include "simple_idct.h"
37 #include "faandct.h"
38 #include "faanidct.h"
39 #include "imgconvert.h"
40 #include "mathops.h"
41 #include "mpegvideo.h"
42 #include "config.h"
44 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
45 uint32_t ff_squareTbl[512] = {0, };
47 #define BIT_DEPTH 9
48 #include "dsputil_template.c"
49 #undef BIT_DEPTH
51 #define BIT_DEPTH 10
52 #include "dsputil_template.c"
53 #undef BIT_DEPTH
55 #define BIT_DEPTH 8
56 #include "dsputil_template.c"
58 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
59 #define pb_7f (~0UL/255 * 0x7f)
60 #define pb_80 (~0UL/255 * 0x80)
62 const uint8_t ff_zigzag_direct[64] = {
63 0, 1, 8, 16, 9, 2, 3, 10,
64 17, 24, 32, 25, 18, 11, 4, 5,
65 12, 19, 26, 33, 40, 48, 41, 34,
66 27, 20, 13, 6, 7, 14, 21, 28,
67 35, 42, 49, 56, 57, 50, 43, 36,
68 29, 22, 15, 23, 30, 37, 44, 51,
69 58, 59, 52, 45, 38, 31, 39, 46,
70 53, 60, 61, 54, 47, 55, 62, 63
73 /* Specific zigzag scan for 248 idct. NOTE that unlike the
74 specification, we interleave the fields */
75 const uint8_t ff_zigzag248_direct[64] = {
76 0, 8, 1, 9, 16, 24, 2, 10,
77 17, 25, 32, 40, 48, 56, 33, 41,
78 18, 26, 3, 11, 4, 12, 19, 27,
79 34, 42, 49, 57, 50, 58, 35, 43,
80 20, 28, 5, 13, 6, 14, 21, 29,
81 36, 44, 51, 59, 52, 60, 37, 45,
82 22, 30, 7, 15, 23, 31, 38, 46,
83 53, 61, 54, 62, 39, 47, 55, 63,
86 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
87 DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
89 const uint8_t ff_alternate_horizontal_scan[64] = {
90 0, 1, 2, 3, 8, 9, 16, 17,
91 10, 11, 4, 5, 6, 7, 15, 14,
92 13, 12, 19, 18, 24, 25, 32, 33,
93 26, 27, 20, 21, 22, 23, 28, 29,
94 30, 31, 34, 35, 40, 41, 48, 49,
95 42, 43, 36, 37, 38, 39, 44, 45,
96 46, 47, 50, 51, 56, 57, 58, 59,
97 52, 53, 54, 55, 60, 61, 62, 63,
100 const uint8_t ff_alternate_vertical_scan[64] = {
101 0, 8, 16, 24, 1, 9, 2, 10,
102 17, 25, 32, 40, 48, 56, 57, 49,
103 41, 33, 26, 18, 3, 11, 4, 12,
104 19, 27, 34, 42, 50, 58, 35, 43,
105 51, 59, 20, 28, 5, 13, 6, 14,
106 21, 29, 36, 44, 52, 60, 37, 45,
107 53, 61, 22, 30, 7, 15, 23, 31,
108 38, 46, 54, 62, 39, 47, 55, 63,
111 /* Input permutation for the simple_idct_mmx */
112 static const uint8_t simple_mmx_permutation[64]={
113 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
114 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
115 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
116 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
117 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
118 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
119 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
120 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
123 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
125 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
126 int i;
127 int end;
129 st->scantable= src_scantable;
131 for(i=0; i<64; i++){
132 int j;
133 j = src_scantable[i];
134 st->permutated[i] = permutation[j];
137 end=-1;
138 for(i=0; i<64; i++){
139 int j;
140 j = st->permutated[i];
141 if(j>end) end=j;
142 st->raster_end[i]= end;
146 void ff_init_scantable_permutation(uint8_t *idct_permutation,
147 int idct_permutation_type)
149 int i;
151 switch(idct_permutation_type){
152 case FF_NO_IDCT_PERM:
153 for(i=0; i<64; i++)
154 idct_permutation[i]= i;
155 break;
156 case FF_LIBMPEG2_IDCT_PERM:
157 for(i=0; i<64; i++)
158 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
159 break;
160 case FF_SIMPLE_IDCT_PERM:
161 for(i=0; i<64; i++)
162 idct_permutation[i]= simple_mmx_permutation[i];
163 break;
164 case FF_TRANSPOSE_IDCT_PERM:
165 for(i=0; i<64; i++)
166 idct_permutation[i]= ((i&7)<<3) | (i>>3);
167 break;
168 case FF_PARTTRANS_IDCT_PERM:
169 for(i=0; i<64; i++)
170 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
171 break;
172 case FF_SSE2_IDCT_PERM:
173 for(i=0; i<64; i++)
174 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
175 break;
176 default:
177 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
181 static int pix_sum_c(uint8_t * pix, int line_size)
183 int s, i, j;
185 s = 0;
186 for (i = 0; i < 16; i++) {
187 for (j = 0; j < 16; j += 8) {
188 s += pix[0];
189 s += pix[1];
190 s += pix[2];
191 s += pix[3];
192 s += pix[4];
193 s += pix[5];
194 s += pix[6];
195 s += pix[7];
196 pix += 8;
198 pix += line_size - 16;
200 return s;
203 static int pix_norm1_c(uint8_t * pix, int line_size)
205 int s, i, j;
206 uint32_t *sq = ff_squareTbl + 256;
208 s = 0;
209 for (i = 0; i < 16; i++) {
210 for (j = 0; j < 16; j += 8) {
211 #if 0
212 s += sq[pix[0]];
213 s += sq[pix[1]];
214 s += sq[pix[2]];
215 s += sq[pix[3]];
216 s += sq[pix[4]];
217 s += sq[pix[5]];
218 s += sq[pix[6]];
219 s += sq[pix[7]];
220 #else
221 #if HAVE_FAST_64BIT
222 register uint64_t x=*(uint64_t*)pix;
223 s += sq[x&0xff];
224 s += sq[(x>>8)&0xff];
225 s += sq[(x>>16)&0xff];
226 s += sq[(x>>24)&0xff];
227 s += sq[(x>>32)&0xff];
228 s += sq[(x>>40)&0xff];
229 s += sq[(x>>48)&0xff];
230 s += sq[(x>>56)&0xff];
231 #else
232 register uint32_t x=*(uint32_t*)pix;
233 s += sq[x&0xff];
234 s += sq[(x>>8)&0xff];
235 s += sq[(x>>16)&0xff];
236 s += sq[(x>>24)&0xff];
237 x=*(uint32_t*)(pix+4);
238 s += sq[x&0xff];
239 s += sq[(x>>8)&0xff];
240 s += sq[(x>>16)&0xff];
241 s += sq[(x>>24)&0xff];
242 #endif
243 #endif
244 pix += 8;
246 pix += line_size - 16;
248 return s;
251 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
252 int i;
254 for(i=0; i+8<=w; i+=8){
255 dst[i+0]= av_bswap32(src[i+0]);
256 dst[i+1]= av_bswap32(src[i+1]);
257 dst[i+2]= av_bswap32(src[i+2]);
258 dst[i+3]= av_bswap32(src[i+3]);
259 dst[i+4]= av_bswap32(src[i+4]);
260 dst[i+5]= av_bswap32(src[i+5]);
261 dst[i+6]= av_bswap32(src[i+6]);
262 dst[i+7]= av_bswap32(src[i+7]);
264 for(;i<w; i++){
265 dst[i+0]= av_bswap32(src[i+0]);
269 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
271 while (len--)
272 *dst++ = av_bswap16(*src++);
275 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
277 int s, i;
278 uint32_t *sq = ff_squareTbl + 256;
280 s = 0;
281 for (i = 0; i < h; i++) {
282 s += sq[pix1[0] - pix2[0]];
283 s += sq[pix1[1] - pix2[1]];
284 s += sq[pix1[2] - pix2[2]];
285 s += sq[pix1[3] - pix2[3]];
286 pix1 += line_size;
287 pix2 += line_size;
289 return s;
292 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
294 int s, i;
295 uint32_t *sq = ff_squareTbl + 256;
297 s = 0;
298 for (i = 0; i < h; i++) {
299 s += sq[pix1[0] - pix2[0]];
300 s += sq[pix1[1] - pix2[1]];
301 s += sq[pix1[2] - pix2[2]];
302 s += sq[pix1[3] - pix2[3]];
303 s += sq[pix1[4] - pix2[4]];
304 s += sq[pix1[5] - pix2[5]];
305 s += sq[pix1[6] - pix2[6]];
306 s += sq[pix1[7] - pix2[7]];
307 pix1 += line_size;
308 pix2 += line_size;
310 return s;
313 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
315 int s, i;
316 uint32_t *sq = ff_squareTbl + 256;
318 s = 0;
319 for (i = 0; i < h; i++) {
320 s += sq[pix1[ 0] - pix2[ 0]];
321 s += sq[pix1[ 1] - pix2[ 1]];
322 s += sq[pix1[ 2] - pix2[ 2]];
323 s += sq[pix1[ 3] - pix2[ 3]];
324 s += sq[pix1[ 4] - pix2[ 4]];
325 s += sq[pix1[ 5] - pix2[ 5]];
326 s += sq[pix1[ 6] - pix2[ 6]];
327 s += sq[pix1[ 7] - pix2[ 7]];
328 s += sq[pix1[ 8] - pix2[ 8]];
329 s += sq[pix1[ 9] - pix2[ 9]];
330 s += sq[pix1[10] - pix2[10]];
331 s += sq[pix1[11] - pix2[11]];
332 s += sq[pix1[12] - pix2[12]];
333 s += sq[pix1[13] - pix2[13]];
334 s += sq[pix1[14] - pix2[14]];
335 s += sq[pix1[15] - pix2[15]];
337 pix1 += line_size;
338 pix2 += line_size;
340 return s;
343 static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
344 const uint8_t *s2, int stride){
345 int i;
347 /* read the pixels */
348 for(i=0;i<8;i++) {
349 block[0] = s1[0] - s2[0];
350 block[1] = s1[1] - s2[1];
351 block[2] = s1[2] - s2[2];
352 block[3] = s1[3] - s2[3];
353 block[4] = s1[4] - s2[4];
354 block[5] = s1[5] - s2[5];
355 block[6] = s1[6] - s2[6];
356 block[7] = s1[7] - s2[7];
357 s1 += stride;
358 s2 += stride;
359 block += 8;
364 static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
365 int line_size)
367 int i;
369 /* read the pixels */
370 for(i=0;i<8;i++) {
371 pixels[0] = av_clip_uint8(block[0]);
372 pixels[1] = av_clip_uint8(block[1]);
373 pixels[2] = av_clip_uint8(block[2]);
374 pixels[3] = av_clip_uint8(block[3]);
375 pixels[4] = av_clip_uint8(block[4]);
376 pixels[5] = av_clip_uint8(block[5]);
377 pixels[6] = av_clip_uint8(block[6]);
378 pixels[7] = av_clip_uint8(block[7]);
380 pixels += line_size;
381 block += 8;
385 static void put_signed_pixels_clamped_c(const int16_t *block,
386 uint8_t *restrict pixels,
387 int line_size)
389 int i, j;
391 for (i = 0; i < 8; i++) {
392 for (j = 0; j < 8; j++) {
393 if (*block < -128)
394 *pixels = 0;
395 else if (*block > 127)
396 *pixels = 255;
397 else
398 *pixels = (uint8_t)(*block + 128);
399 block++;
400 pixels++;
402 pixels += (line_size - 8);
406 static void add_pixels8_c(uint8_t *restrict pixels,
407 int16_t *block,
408 int line_size)
410 int i;
412 for(i=0;i<8;i++) {
413 pixels[0] += block[0];
414 pixels[1] += block[1];
415 pixels[2] += block[2];
416 pixels[3] += block[3];
417 pixels[4] += block[4];
418 pixels[5] += block[5];
419 pixels[6] += block[6];
420 pixels[7] += block[7];
421 pixels += line_size;
422 block += 8;
426 static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
427 int line_size)
429 int i;
431 /* read the pixels */
432 for(i=0;i<8;i++) {
433 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
434 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
435 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
436 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
437 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
438 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
439 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
440 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
441 pixels += line_size;
442 block += 8;
446 static int sum_abs_dctelem_c(int16_t *block)
448 int sum=0, i;
449 for(i=0; i<64; i++)
450 sum+= FFABS(block[i]);
451 return sum;
454 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
456 int i;
458 for (i = 0; i < h; i++) {
459 memset(block, value, 16);
460 block += line_size;
464 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
466 int i;
468 for (i = 0; i < h; i++) {
469 memset(block, value, 8);
470 block += line_size;
474 #define avg2(a,b) ((a+b+1)>>1)
475 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
477 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
479 const int A=(16-x16)*(16-y16);
480 const int B=( x16)*(16-y16);
481 const int C=(16-x16)*( y16);
482 const int D=( x16)*( y16);
483 int i;
485 for(i=0; i<h; i++)
487 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
488 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
489 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
490 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
491 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
492 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
493 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
494 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
495 dst+= stride;
496 src+= stride;
500 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
501 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
503 int y, vx, vy;
504 const int s= 1<<shift;
506 width--;
507 height--;
509 for(y=0; y<h; y++){
510 int x;
512 vx= ox;
513 vy= oy;
514 for(x=0; x<8; x++){ //XXX FIXME optimize
515 int src_x, src_y, frac_x, frac_y, index;
517 src_x= vx>>16;
518 src_y= vy>>16;
519 frac_x= src_x&(s-1);
520 frac_y= src_y&(s-1);
521 src_x>>=shift;
522 src_y>>=shift;
524 if((unsigned)src_x < width){
525 if((unsigned)src_y < height){
526 index= src_x + src_y*stride;
527 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
528 + src[index +1]* frac_x )*(s-frac_y)
529 + ( src[index+stride ]*(s-frac_x)
530 + src[index+stride+1]* frac_x )* frac_y
531 + r)>>(shift*2);
532 }else{
533 index= src_x + av_clip(src_y, 0, height)*stride;
534 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
535 + src[index +1]* frac_x )*s
536 + r)>>(shift*2);
538 }else{
539 if((unsigned)src_y < height){
540 index= av_clip(src_x, 0, width) + src_y*stride;
541 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
542 + src[index+stride ]* frac_y )*s
543 + r)>>(shift*2);
544 }else{
545 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
546 dst[y*stride + x]= src[index ];
550 vx+= dxx;
551 vy+= dyx;
553 ox += dxy;
554 oy += dyy;
558 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
559 switch(width){
560 case 2: put_pixels2_8_c (dst, src, stride, height); break;
561 case 4: put_pixels4_8_c (dst, src, stride, height); break;
562 case 8: put_pixels8_8_c (dst, src, stride, height); break;
563 case 16:put_pixels16_8_c(dst, src, stride, height); break;
567 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
568 int i,j;
569 for (i=0; i < height; i++) {
570 for (j=0; j < width; j++) {
571 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
573 src += stride;
574 dst += stride;
578 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
579 int i,j;
580 for (i=0; i < height; i++) {
581 for (j=0; j < width; j++) {
582 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
584 src += stride;
585 dst += stride;
589 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
590 int i,j;
591 for (i=0; i < height; i++) {
592 for (j=0; j < width; j++) {
593 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
595 src += stride;
596 dst += stride;
600 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
601 int i,j;
602 for (i=0; i < height; i++) {
603 for (j=0; j < width; j++) {
604 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
606 src += stride;
607 dst += stride;
611 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
612 int i,j;
613 for (i=0; i < height; i++) {
614 for (j=0; j < width; j++) {
615 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
617 src += stride;
618 dst += stride;
622 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
623 int i,j;
624 for (i=0; i < height; i++) {
625 for (j=0; j < width; j++) {
626 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
628 src += stride;
629 dst += stride;
633 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
634 int i,j;
635 for (i=0; i < height; i++) {
636 for (j=0; j < width; j++) {
637 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
639 src += stride;
640 dst += stride;
644 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
645 int i,j;
646 for (i=0; i < height; i++) {
647 for (j=0; j < width; j++) {
648 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
650 src += stride;
651 dst += stride;
655 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
656 switch(width){
657 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
658 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
659 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
660 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
664 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
665 int i,j;
666 for (i=0; i < height; i++) {
667 for (j=0; j < width; j++) {
668 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
670 src += stride;
671 dst += stride;
675 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
676 int i,j;
677 for (i=0; i < height; i++) {
678 for (j=0; j < width; j++) {
679 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
681 src += stride;
682 dst += stride;
686 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
687 int i,j;
688 for (i=0; i < height; i++) {
689 for (j=0; j < width; j++) {
690 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
692 src += stride;
693 dst += stride;
697 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
698 int i,j;
699 for (i=0; i < height; i++) {
700 for (j=0; j < width; j++) {
701 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
703 src += stride;
704 dst += stride;
708 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
709 int i,j;
710 for (i=0; i < height; i++) {
711 for (j=0; j < width; j++) {
712 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
714 src += stride;
715 dst += stride;
719 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
720 int i,j;
721 for (i=0; i < height; i++) {
722 for (j=0; j < width; j++) {
723 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
725 src += stride;
726 dst += stride;
730 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
731 int i,j;
732 for (i=0; i < height; i++) {
733 for (j=0; j < width; j++) {
734 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
736 src += stride;
737 dst += stride;
741 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
742 int i,j;
743 for (i=0; i < height; i++) {
744 for (j=0; j < width; j++) {
745 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
747 src += stride;
748 dst += stride;
752 #define QPEL_MC(r, OPNAME, RND, OP) \
753 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
754 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
755 int i;\
756 for(i=0; i<h; i++)\
758 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
759 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
760 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
761 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
762 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
763 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
764 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
765 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
766 dst+=dstStride;\
767 src+=srcStride;\
771 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
772 const int w=8;\
773 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
774 int i;\
775 for(i=0; i<w; i++)\
777 const int src0= src[0*srcStride];\
778 const int src1= src[1*srcStride];\
779 const int src2= src[2*srcStride];\
780 const int src3= src[3*srcStride];\
781 const int src4= src[4*srcStride];\
782 const int src5= src[5*srcStride];\
783 const int src6= src[6*srcStride];\
784 const int src7= src[7*srcStride];\
785 const int src8= src[8*srcStride];\
786 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
787 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
788 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
789 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
790 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
791 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
792 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
793 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
794 dst++;\
795 src++;\
799 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
800 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
801 int i;\
803 for(i=0; i<h; i++)\
805 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
806 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
807 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
808 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
809 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
810 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
811 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
812 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
813 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
814 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
815 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
816 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
817 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
818 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
819 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
820 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
821 dst+=dstStride;\
822 src+=srcStride;\
826 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
827 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
828 int i;\
829 const int w=16;\
830 for(i=0; i<w; i++)\
832 const int src0= src[0*srcStride];\
833 const int src1= src[1*srcStride];\
834 const int src2= src[2*srcStride];\
835 const int src3= src[3*srcStride];\
836 const int src4= src[4*srcStride];\
837 const int src5= src[5*srcStride];\
838 const int src6= src[6*srcStride];\
839 const int src7= src[7*srcStride];\
840 const int src8= src[8*srcStride];\
841 const int src9= src[9*srcStride];\
842 const int src10= src[10*srcStride];\
843 const int src11= src[11*srcStride];\
844 const int src12= src[12*srcStride];\
845 const int src13= src[13*srcStride];\
846 const int src14= src[14*srcStride];\
847 const int src15= src[15*srcStride];\
848 const int src16= src[16*srcStride];\
849 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
850 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
851 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
852 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
853 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
854 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
855 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
856 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
857 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
858 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
859 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
860 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
861 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
862 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
863 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
864 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
865 dst++;\
866 src++;\
870 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
871 uint8_t half[64];\
872 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
873 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
876 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
877 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
880 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
881 uint8_t half[64];\
882 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
883 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
886 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
887 uint8_t full[16*9];\
888 uint8_t half[64];\
889 copy_block9(full, src, 16, stride, 9);\
890 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
891 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
894 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
895 uint8_t full[16*9];\
896 copy_block9(full, src, 16, stride, 9);\
897 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
900 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
901 uint8_t full[16*9];\
902 uint8_t half[64];\
903 copy_block9(full, src, 16, stride, 9);\
904 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
905 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
907 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
908 uint8_t full[16*9];\
909 uint8_t halfH[72];\
910 uint8_t halfV[64];\
911 uint8_t halfHV[64];\
912 copy_block9(full, src, 16, stride, 9);\
913 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
914 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
915 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
916 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
918 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
919 uint8_t full[16*9];\
920 uint8_t halfH[72];\
921 uint8_t halfHV[64];\
922 copy_block9(full, src, 16, stride, 9);\
923 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
924 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
925 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
926 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
928 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
929 uint8_t full[16*9];\
930 uint8_t halfH[72];\
931 uint8_t halfV[64];\
932 uint8_t halfHV[64];\
933 copy_block9(full, src, 16, stride, 9);\
934 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
935 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
936 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
937 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
939 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
940 uint8_t full[16*9];\
941 uint8_t halfH[72];\
942 uint8_t halfHV[64];\
943 copy_block9(full, src, 16, stride, 9);\
944 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
945 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
946 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
947 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
949 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
950 uint8_t full[16*9];\
951 uint8_t halfH[72];\
952 uint8_t halfV[64];\
953 uint8_t halfHV[64];\
954 copy_block9(full, src, 16, stride, 9);\
955 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
956 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
957 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
958 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
960 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
961 uint8_t full[16*9];\
962 uint8_t halfH[72];\
963 uint8_t halfHV[64];\
964 copy_block9(full, src, 16, stride, 9);\
965 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
966 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
967 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
968 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
970 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
971 uint8_t full[16*9];\
972 uint8_t halfH[72];\
973 uint8_t halfV[64];\
974 uint8_t halfHV[64];\
975 copy_block9(full, src, 16, stride, 9);\
976 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
977 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
978 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
979 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
981 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
982 uint8_t full[16*9];\
983 uint8_t halfH[72];\
984 uint8_t halfHV[64];\
985 copy_block9(full, src, 16, stride, 9);\
986 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
987 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
988 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
989 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
991 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
992 uint8_t halfH[72];\
993 uint8_t halfHV[64];\
994 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
995 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
996 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
998 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
999 uint8_t halfH[72];\
1000 uint8_t halfHV[64];\
1001 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1002 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1003 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1005 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1006 uint8_t full[16*9];\
1007 uint8_t halfH[72];\
1008 uint8_t halfV[64];\
1009 uint8_t halfHV[64];\
1010 copy_block9(full, src, 16, stride, 9);\
1011 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1012 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1013 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1014 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1016 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1017 uint8_t full[16*9];\
1018 uint8_t halfH[72];\
1019 copy_block9(full, src, 16, stride, 9);\
1020 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1021 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1022 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1024 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1025 uint8_t full[16*9];\
1026 uint8_t halfH[72];\
1027 uint8_t halfV[64];\
1028 uint8_t halfHV[64];\
1029 copy_block9(full, src, 16, stride, 9);\
1030 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1031 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1032 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1033 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1035 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1036 uint8_t full[16*9];\
1037 uint8_t halfH[72];\
1038 copy_block9(full, src, 16, stride, 9);\
1039 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1040 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1041 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1043 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1044 uint8_t halfH[72];\
1045 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1046 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1049 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1050 uint8_t half[256];\
1051 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1052 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1055 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1056 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1059 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1060 uint8_t half[256];\
1061 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1062 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1065 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1066 uint8_t full[24*17];\
1067 uint8_t half[256];\
1068 copy_block17(full, src, 24, stride, 17);\
1069 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1070 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1073 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1074 uint8_t full[24*17];\
1075 copy_block17(full, src, 24, stride, 17);\
1076 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1079 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1080 uint8_t full[24*17];\
1081 uint8_t half[256];\
1082 copy_block17(full, src, 24, stride, 17);\
1083 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1084 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1086 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1087 uint8_t full[24*17];\
1088 uint8_t halfH[272];\
1089 uint8_t halfV[256];\
1090 uint8_t halfHV[256];\
1091 copy_block17(full, src, 24, stride, 17);\
1092 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1093 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1094 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1095 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1097 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1098 uint8_t full[24*17];\
1099 uint8_t halfH[272];\
1100 uint8_t halfHV[256];\
1101 copy_block17(full, src, 24, stride, 17);\
1102 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1103 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1104 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1105 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1107 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1108 uint8_t full[24*17];\
1109 uint8_t halfH[272];\
1110 uint8_t halfV[256];\
1111 uint8_t halfHV[256];\
1112 copy_block17(full, src, 24, stride, 17);\
1113 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1114 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1115 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1116 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1118 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1119 uint8_t full[24*17];\
1120 uint8_t halfH[272];\
1121 uint8_t halfHV[256];\
1122 copy_block17(full, src, 24, stride, 17);\
1123 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1124 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1125 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1126 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1128 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1129 uint8_t full[24*17];\
1130 uint8_t halfH[272];\
1131 uint8_t halfV[256];\
1132 uint8_t halfHV[256];\
1133 copy_block17(full, src, 24, stride, 17);\
1134 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1135 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1136 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1137 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1139 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1140 uint8_t full[24*17];\
1141 uint8_t halfH[272];\
1142 uint8_t halfHV[256];\
1143 copy_block17(full, src, 24, stride, 17);\
1144 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1145 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1146 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1147 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1149 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1150 uint8_t full[24*17];\
1151 uint8_t halfH[272];\
1152 uint8_t halfV[256];\
1153 uint8_t halfHV[256];\
1154 copy_block17(full, src, 24, stride, 17);\
1155 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1156 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1157 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1158 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1160 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1161 uint8_t full[24*17];\
1162 uint8_t halfH[272];\
1163 uint8_t halfHV[256];\
1164 copy_block17(full, src, 24, stride, 17);\
1165 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1166 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1167 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1168 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1170 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1171 uint8_t halfH[272];\
1172 uint8_t halfHV[256];\
1173 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1174 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1175 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1177 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1178 uint8_t halfH[272];\
1179 uint8_t halfHV[256];\
1180 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1181 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1182 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1184 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1185 uint8_t full[24*17];\
1186 uint8_t halfH[272];\
1187 uint8_t halfV[256];\
1188 uint8_t halfHV[256];\
1189 copy_block17(full, src, 24, stride, 17);\
1190 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1191 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1192 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1193 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1195 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1196 uint8_t full[24*17];\
1197 uint8_t halfH[272];\
1198 copy_block17(full, src, 24, stride, 17);\
1199 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1200 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1201 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1203 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1204 uint8_t full[24*17];\
1205 uint8_t halfH[272];\
1206 uint8_t halfV[256];\
1207 uint8_t halfHV[256];\
1208 copy_block17(full, src, 24, stride, 17);\
1209 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1210 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1211 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1212 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1214 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1215 uint8_t full[24*17];\
1216 uint8_t halfH[272];\
1217 copy_block17(full, src, 24, stride, 17);\
1218 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1219 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1220 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1222 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1223 uint8_t halfH[272];\
1224 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1225 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1228 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1229 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1230 #define op_put(a, b) a = cm[((b) + 16)>>5]
1231 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1233 QPEL_MC(0, put_ , _ , op_put)
1234 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1235 QPEL_MC(0, avg_ , _ , op_avg)
1236 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
1237 #undef op_avg
1238 #undef op_avg_no_rnd
1239 #undef op_put
1240 #undef op_put_no_rnd
1242 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1243 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1244 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1245 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1246 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1247 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1249 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1250 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1251 int i;
1253 for(i=0; i<h; i++){
1254 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1255 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1256 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1257 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1258 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1259 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1260 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1261 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1262 dst+=dstStride;
1263 src+=srcStride;
1267 #if CONFIG_RV40_DECODER
1268 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1269 put_pixels16_xy2_8_c(dst, src, stride, 16);
1271 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1272 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1274 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1275 put_pixels8_xy2_8_c(dst, src, stride, 8);
1277 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1278 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1280 #endif /* CONFIG_RV40_DECODER */
1282 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1283 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1284 int i;
1286 for(i=0; i<w; i++){
1287 const int src_1= src[ -srcStride];
1288 const int src0 = src[0 ];
1289 const int src1 = src[ srcStride];
1290 const int src2 = src[2*srcStride];
1291 const int src3 = src[3*srcStride];
1292 const int src4 = src[4*srcStride];
1293 const int src5 = src[5*srcStride];
1294 const int src6 = src[6*srcStride];
1295 const int src7 = src[7*srcStride];
1296 const int src8 = src[8*srcStride];
1297 const int src9 = src[9*srcStride];
1298 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1299 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1300 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1301 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1302 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1303 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1304 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1305 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1306 src++;
1307 dst++;
1311 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1312 uint8_t half[64];
1313 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1314 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1317 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1318 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1321 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1322 uint8_t half[64];
1323 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1324 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1327 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1328 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1331 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1332 uint8_t halfH[88];
1333 uint8_t halfV[64];
1334 uint8_t halfHV[64];
1335 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1336 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1337 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1338 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1340 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1341 uint8_t halfH[88];
1342 uint8_t halfV[64];
1343 uint8_t halfHV[64];
1344 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1345 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1346 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1347 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1349 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1350 uint8_t halfH[88];
1351 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1352 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1355 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1356 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1357 int x;
1358 const int strength= ff_h263_loop_filter_strength[qscale];
1360 for(x=0; x<8; x++){
1361 int d1, d2, ad1;
1362 int p0= src[x-2*stride];
1363 int p1= src[x-1*stride];
1364 int p2= src[x+0*stride];
1365 int p3= src[x+1*stride];
1366 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1368 if (d<-2*strength) d1= 0;
1369 else if(d<- strength) d1=-2*strength - d;
1370 else if(d< strength) d1= d;
1371 else if(d< 2*strength) d1= 2*strength - d;
1372 else d1= 0;
1374 p1 += d1;
1375 p2 -= d1;
1376 if(p1&256) p1= ~(p1>>31);
1377 if(p2&256) p2= ~(p2>>31);
1379 src[x-1*stride] = p1;
1380 src[x+0*stride] = p2;
1382 ad1= FFABS(d1)>>1;
1384 d2= av_clip((p0-p3)/4, -ad1, ad1);
1386 src[x-2*stride] = p0 - d2;
1387 src[x+ stride] = p3 + d2;
1392 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1393 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1394 int y;
1395 const int strength= ff_h263_loop_filter_strength[qscale];
1397 for(y=0; y<8; y++){
1398 int d1, d2, ad1;
1399 int p0= src[y*stride-2];
1400 int p1= src[y*stride-1];
1401 int p2= src[y*stride+0];
1402 int p3= src[y*stride+1];
1403 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1405 if (d<-2*strength) d1= 0;
1406 else if(d<- strength) d1=-2*strength - d;
1407 else if(d< strength) d1= d;
1408 else if(d< 2*strength) d1= 2*strength - d;
1409 else d1= 0;
1411 p1 += d1;
1412 p2 -= d1;
1413 if(p1&256) p1= ~(p1>>31);
1414 if(p2&256) p2= ~(p2>>31);
1416 src[y*stride-1] = p1;
1417 src[y*stride+0] = p2;
1419 ad1= FFABS(d1)>>1;
1421 d2= av_clip((p0-p3)/4, -ad1, ad1);
1423 src[y*stride-2] = p0 - d2;
1424 src[y*stride+1] = p3 + d2;
1429 static void h261_loop_filter_c(uint8_t *src, int stride){
1430 int x,y,xy,yz;
1431 int temp[64];
1433 for(x=0; x<8; x++){
1434 temp[x ] = 4*src[x ];
1435 temp[x + 7*8] = 4*src[x + 7*stride];
1437 for(y=1; y<7; y++){
1438 for(x=0; x<8; x++){
1439 xy = y * stride + x;
1440 yz = y * 8 + x;
1441 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1445 for(y=0; y<8; y++){
1446 src[ y*stride] = (temp[ y*8] + 2)>>2;
1447 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1448 for(x=1; x<7; x++){
1449 xy = y * stride + x;
1450 yz = y * 8 + x;
1451 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1456 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1458 int s, i;
1460 s = 0;
1461 for(i=0;i<h;i++) {
1462 s += abs(pix1[0] - pix2[0]);
1463 s += abs(pix1[1] - pix2[1]);
1464 s += abs(pix1[2] - pix2[2]);
1465 s += abs(pix1[3] - pix2[3]);
1466 s += abs(pix1[4] - pix2[4]);
1467 s += abs(pix1[5] - pix2[5]);
1468 s += abs(pix1[6] - pix2[6]);
1469 s += abs(pix1[7] - pix2[7]);
1470 s += abs(pix1[8] - pix2[8]);
1471 s += abs(pix1[9] - pix2[9]);
1472 s += abs(pix1[10] - pix2[10]);
1473 s += abs(pix1[11] - pix2[11]);
1474 s += abs(pix1[12] - pix2[12]);
1475 s += abs(pix1[13] - pix2[13]);
1476 s += abs(pix1[14] - pix2[14]);
1477 s += abs(pix1[15] - pix2[15]);
1478 pix1 += line_size;
1479 pix2 += line_size;
1481 return s;
1484 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1486 int s, i;
1488 s = 0;
1489 for(i=0;i<h;i++) {
1490 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1491 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1492 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1493 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1494 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1495 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1496 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1497 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1498 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1499 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1500 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1501 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1502 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1503 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1504 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1505 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1506 pix1 += line_size;
1507 pix2 += line_size;
1509 return s;
1512 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1514 int s, i;
1515 uint8_t *pix3 = pix2 + line_size;
1517 s = 0;
1518 for(i=0;i<h;i++) {
1519 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1520 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1521 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1522 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1523 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1524 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1525 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1526 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1527 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1528 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1529 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1530 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1531 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1532 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1533 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1534 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1535 pix1 += line_size;
1536 pix2 += line_size;
1537 pix3 += line_size;
1539 return s;
1542 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1544 int s, i;
1545 uint8_t *pix3 = pix2 + line_size;
1547 s = 0;
1548 for(i=0;i<h;i++) {
1549 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1550 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1551 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1552 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1553 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1554 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1555 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1556 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1557 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1558 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1559 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1560 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1561 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1562 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1563 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1564 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1565 pix1 += line_size;
1566 pix2 += line_size;
1567 pix3 += line_size;
1569 return s;
1572 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1574 int s, i;
1576 s = 0;
1577 for(i=0;i<h;i++) {
1578 s += abs(pix1[0] - pix2[0]);
1579 s += abs(pix1[1] - pix2[1]);
1580 s += abs(pix1[2] - pix2[2]);
1581 s += abs(pix1[3] - pix2[3]);
1582 s += abs(pix1[4] - pix2[4]);
1583 s += abs(pix1[5] - pix2[5]);
1584 s += abs(pix1[6] - pix2[6]);
1585 s += abs(pix1[7] - pix2[7]);
1586 pix1 += line_size;
1587 pix2 += line_size;
1589 return s;
1592 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1594 int s, i;
1596 s = 0;
1597 for(i=0;i<h;i++) {
1598 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1599 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1600 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1601 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1602 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1603 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1604 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1605 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1606 pix1 += line_size;
1607 pix2 += line_size;
1609 return s;
1612 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1614 int s, i;
1615 uint8_t *pix3 = pix2 + line_size;
1617 s = 0;
1618 for(i=0;i<h;i++) {
1619 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1620 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1621 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1622 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1623 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1624 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1625 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1626 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1627 pix1 += line_size;
1628 pix2 += line_size;
1629 pix3 += line_size;
1631 return s;
1634 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1636 int s, i;
1637 uint8_t *pix3 = pix2 + line_size;
1639 s = 0;
1640 for(i=0;i<h;i++) {
1641 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1642 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1643 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1644 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1645 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1646 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1647 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1648 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1649 pix1 += line_size;
1650 pix2 += line_size;
1651 pix3 += line_size;
1653 return s;
1656 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1657 MpegEncContext *c = v;
1658 int score1=0;
1659 int score2=0;
1660 int x,y;
1662 for(y=0; y<h; y++){
1663 for(x=0; x<16; x++){
1664 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1666 if(y+1<h){
1667 for(x=0; x<15; x++){
1668 score2+= FFABS( s1[x ] - s1[x +stride]
1669 - s1[x+1] + s1[x+1+stride])
1670 -FFABS( s2[x ] - s2[x +stride]
1671 - s2[x+1] + s2[x+1+stride]);
1674 s1+= stride;
1675 s2+= stride;
1678 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1679 else return score1 + FFABS(score2)*8;
1682 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1683 MpegEncContext *c = v;
1684 int score1=0;
1685 int score2=0;
1686 int x,y;
1688 for(y=0; y<h; y++){
1689 for(x=0; x<8; x++){
1690 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1692 if(y+1<h){
1693 for(x=0; x<7; x++){
1694 score2+= FFABS( s1[x ] - s1[x +stride]
1695 - s1[x+1] + s1[x+1+stride])
1696 -FFABS( s2[x ] - s2[x +stride]
1697 - s2[x+1] + s2[x+1+stride]);
1700 s1+= stride;
1701 s2+= stride;
1704 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1705 else return score1 + FFABS(score2)*8;
1708 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1709 int i;
1710 unsigned int sum=0;
1712 for(i=0; i<8*8; i++){
1713 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1714 int w= weight[i];
1715 b>>= RECON_SHIFT;
1716 assert(-512<b && b<512);
1718 sum += (w*b)*(w*b)>>4;
1720 return sum>>2;
1723 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1724 int i;
1726 for(i=0; i<8*8; i++){
1727 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1731 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1732 return 0;
1735 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1736 int i;
1738 memset(cmp, 0, sizeof(void*)*6);
1740 for(i=0; i<6; i++){
1741 switch(type&0xFF){
1742 case FF_CMP_SAD:
1743 cmp[i]= c->sad[i];
1744 break;
1745 case FF_CMP_SATD:
1746 cmp[i]= c->hadamard8_diff[i];
1747 break;
1748 case FF_CMP_SSE:
1749 cmp[i]= c->sse[i];
1750 break;
1751 case FF_CMP_DCT:
1752 cmp[i]= c->dct_sad[i];
1753 break;
1754 case FF_CMP_DCT264:
1755 cmp[i]= c->dct264_sad[i];
1756 break;
1757 case FF_CMP_DCTMAX:
1758 cmp[i]= c->dct_max[i];
1759 break;
1760 case FF_CMP_PSNR:
1761 cmp[i]= c->quant_psnr[i];
1762 break;
1763 case FF_CMP_BIT:
1764 cmp[i]= c->bit[i];
1765 break;
1766 case FF_CMP_RD:
1767 cmp[i]= c->rd[i];
1768 break;
1769 case FF_CMP_VSAD:
1770 cmp[i]= c->vsad[i];
1771 break;
1772 case FF_CMP_VSSE:
1773 cmp[i]= c->vsse[i];
1774 break;
1775 case FF_CMP_ZERO:
1776 cmp[i]= zero_cmp;
1777 break;
1778 case FF_CMP_NSSE:
1779 cmp[i]= c->nsse[i];
1780 break;
1781 default:
1782 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1787 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1788 long i;
1789 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1790 long a = *(long*)(src+i);
1791 long b = *(long*)(dst+i);
1792 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1794 for(; i<w; i++)
1795 dst[i+0] += src[i+0];
1798 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1799 long i;
1800 #if !HAVE_FAST_UNALIGNED
1801 if((long)src2 & (sizeof(long)-1)){
1802 for(i=0; i+7<w; i+=8){
1803 dst[i+0] = src1[i+0]-src2[i+0];
1804 dst[i+1] = src1[i+1]-src2[i+1];
1805 dst[i+2] = src1[i+2]-src2[i+2];
1806 dst[i+3] = src1[i+3]-src2[i+3];
1807 dst[i+4] = src1[i+4]-src2[i+4];
1808 dst[i+5] = src1[i+5]-src2[i+5];
1809 dst[i+6] = src1[i+6]-src2[i+6];
1810 dst[i+7] = src1[i+7]-src2[i+7];
1812 }else
1813 #endif
1814 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1815 long a = *(long*)(src1+i);
1816 long b = *(long*)(src2+i);
1817 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1819 for(; i<w; i++)
1820 dst[i+0] = src1[i+0]-src2[i+0];
1823 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1824 int i;
1825 uint8_t l, lt;
1827 l= *left;
1828 lt= *left_top;
1830 for(i=0; i<w; i++){
1831 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1832 lt= src1[i];
1833 dst[i]= l;
1836 *left= l;
1837 *left_top= lt;
1840 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1841 int i;
1842 uint8_t l, lt;
1844 l= *left;
1845 lt= *left_top;
1847 for(i=0; i<w; i++){
1848 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1849 lt= src1[i];
1850 l= src2[i];
1851 dst[i]= l - pred;
1854 *left= l;
1855 *left_top= lt;
1858 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1859 int i;
1861 for(i=0; i<w-1; i++){
1862 acc+= src[i];
1863 dst[i]= acc;
1864 i++;
1865 acc+= src[i];
1866 dst[i]= acc;
1869 for(; i<w; i++){
1870 acc+= src[i];
1871 dst[i]= acc;
1874 return acc;
1877 #if HAVE_BIGENDIAN
1878 #define B 3
1879 #define G 2
1880 #define R 1
1881 #define A 0
1882 #else
1883 #define B 0
1884 #define G 1
1885 #define R 2
1886 #define A 3
1887 #endif
1888 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1889 int i;
1890 int r,g,b,a;
1891 r= *red;
1892 g= *green;
1893 b= *blue;
1894 a= *alpha;
1896 for(i=0; i<w; i++){
1897 b+= src[4*i+B];
1898 g+= src[4*i+G];
1899 r+= src[4*i+R];
1900 a+= src[4*i+A];
1902 dst[4*i+B]= b;
1903 dst[4*i+G]= g;
1904 dst[4*i+R]= r;
1905 dst[4*i+A]= a;
1908 *red= r;
1909 *green= g;
1910 *blue= b;
1911 *alpha= a;
1913 #undef B
1914 #undef G
1915 #undef R
1916 #undef A
1918 #define BUTTERFLY2(o1,o2,i1,i2) \
1919 o1= (i1)+(i2);\
1920 o2= (i1)-(i2);
1922 #define BUTTERFLY1(x,y) \
1924 int a,b;\
1925 a= x;\
1926 b= y;\
1927 x= a+b;\
1928 y= a-b;\
1931 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
1933 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
1934 int i;
1935 int temp[64];
1936 int sum=0;
1938 assert(h==8);
1940 for(i=0; i<8; i++){
1941 //FIXME try pointer walks
1942 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
1943 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
1944 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
1945 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1947 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1948 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1949 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1950 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1952 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1953 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1954 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
1955 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
1958 for(i=0; i<8; i++){
1959 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
1960 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
1961 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
1962 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1964 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
1965 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
1966 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
1967 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1969 sum +=
1970 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
1971 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
1972 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
1973 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
1975 return sum;
1978 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
1979 int i;
1980 int temp[64];
1981 int sum=0;
1983 assert(h==8);
1985 for(i=0; i<8; i++){
1986 //FIXME try pointer walks
1987 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
1988 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
1989 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
1990 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
1992 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
1993 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
1994 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
1995 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1997 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
1998 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
1999 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2000 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2003 for(i=0; i<8; i++){
2004 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2005 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2006 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2007 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2009 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2010 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2011 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2012 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2014 sum +=
2015 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2016 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2017 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2018 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2021 sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2023 return sum;
2026 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2027 MpegEncContext * const s= (MpegEncContext *)c;
2028 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2030 assert(h==8);
2032 s->dsp.diff_pixels(temp, src1, src2, stride);
2033 s->dsp.fdct(temp);
2034 return s->dsp.sum_abs_dctelem(temp);
2037 #if CONFIG_GPL
2038 #define DCT8_1D {\
2039 const int s07 = SRC(0) + SRC(7);\
2040 const int s16 = SRC(1) + SRC(6);\
2041 const int s25 = SRC(2) + SRC(5);\
2042 const int s34 = SRC(3) + SRC(4);\
2043 const int a0 = s07 + s34;\
2044 const int a1 = s16 + s25;\
2045 const int a2 = s07 - s34;\
2046 const int a3 = s16 - s25;\
2047 const int d07 = SRC(0) - SRC(7);\
2048 const int d16 = SRC(1) - SRC(6);\
2049 const int d25 = SRC(2) - SRC(5);\
2050 const int d34 = SRC(3) - SRC(4);\
2051 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2052 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2053 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2054 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2055 DST(0, a0 + a1 ) ;\
2056 DST(1, a4 + (a7>>2)) ;\
2057 DST(2, a2 + (a3>>1)) ;\
2058 DST(3, a5 + (a6>>2)) ;\
2059 DST(4, a0 - a1 ) ;\
2060 DST(5, a6 - (a5>>2)) ;\
2061 DST(6, (a2>>1) - a3 ) ;\
2062 DST(7, (a4>>2) - a7 ) ;\
2065 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2066 MpegEncContext * const s= (MpegEncContext *)c;
2067 int16_t dct[8][8];
2068 int i;
2069 int sum=0;
2071 s->dsp.diff_pixels(dct[0], src1, src2, stride);
2073 #define SRC(x) dct[i][x]
2074 #define DST(x,v) dct[i][x]= v
2075 for( i = 0; i < 8; i++ )
2076 DCT8_1D
2077 #undef SRC
2078 #undef DST
2080 #define SRC(x) dct[x][i]
2081 #define DST(x,v) sum += FFABS(v)
2082 for( i = 0; i < 8; i++ )
2083 DCT8_1D
2084 #undef SRC
2085 #undef DST
2086 return sum;
2088 #endif
2090 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2091 MpegEncContext * const s= (MpegEncContext *)c;
2092 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2093 int sum=0, i;
2095 assert(h==8);
2097 s->dsp.diff_pixels(temp, src1, src2, stride);
2098 s->dsp.fdct(temp);
2100 for(i=0; i<64; i++)
2101 sum= FFMAX(sum, FFABS(temp[i]));
2103 return sum;
2106 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2107 MpegEncContext * const s= (MpegEncContext *)c;
2108 LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
2109 int16_t * const bak = temp+64;
2110 int sum=0, i;
2112 assert(h==8);
2113 s->mb_intra=0;
2115 s->dsp.diff_pixels(temp, src1, src2, stride);
2117 memcpy(bak, temp, 64*sizeof(int16_t));
2119 s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2120 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2121 ff_simple_idct_8(temp); //FIXME
2123 for(i=0; i<64; i++)
2124 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2126 return sum;
2129 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2130 MpegEncContext * const s= (MpegEncContext *)c;
2131 const uint8_t *scantable= s->intra_scantable.permutated;
2132 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2133 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2134 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2135 int i, last, run, bits, level, distortion, start_i;
2136 const int esc_length= s->ac_esc_length;
2137 uint8_t * length;
2138 uint8_t * last_length;
2140 assert(h==8);
2142 copy_block8(lsrc1, src1, 8, stride, 8);
2143 copy_block8(lsrc2, src2, 8, stride, 8);
2145 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2147 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2149 bits=0;
2151 if (s->mb_intra) {
2152 start_i = 1;
2153 length = s->intra_ac_vlc_length;
2154 last_length= s->intra_ac_vlc_last_length;
2155 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2156 } else {
2157 start_i = 0;
2158 length = s->inter_ac_vlc_length;
2159 last_length= s->inter_ac_vlc_last_length;
2162 if(last>=start_i){
2163 run=0;
2164 for(i=start_i; i<last; i++){
2165 int j= scantable[i];
2166 level= temp[j];
2168 if(level){
2169 level+=64;
2170 if((level&(~127)) == 0){
2171 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2172 }else
2173 bits+= esc_length;
2174 run=0;
2175 }else
2176 run++;
2178 i= scantable[last];
2180 level= temp[i] + 64;
2182 assert(level - 64);
2184 if((level&(~127)) == 0){
2185 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2186 }else
2187 bits+= esc_length;
2191 if(last>=0){
2192 if(s->mb_intra)
2193 s->dct_unquantize_intra(s, temp, 0, s->qscale);
2194 else
2195 s->dct_unquantize_inter(s, temp, 0, s->qscale);
2198 s->dsp.idct_add(lsrc2, 8, temp);
2200 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2202 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2205 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2206 MpegEncContext * const s= (MpegEncContext *)c;
2207 const uint8_t *scantable= s->intra_scantable.permutated;
2208 LOCAL_ALIGNED_16(int16_t, temp, [64]);
2209 int i, last, run, bits, level, start_i;
2210 const int esc_length= s->ac_esc_length;
2211 uint8_t * length;
2212 uint8_t * last_length;
2214 assert(h==8);
2216 s->dsp.diff_pixels(temp, src1, src2, stride);
2218 s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2220 bits=0;
2222 if (s->mb_intra) {
2223 start_i = 1;
2224 length = s->intra_ac_vlc_length;
2225 last_length= s->intra_ac_vlc_last_length;
2226 bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2227 } else {
2228 start_i = 0;
2229 length = s->inter_ac_vlc_length;
2230 last_length= s->inter_ac_vlc_last_length;
2233 if(last>=start_i){
2234 run=0;
2235 for(i=start_i; i<last; i++){
2236 int j= scantable[i];
2237 level= temp[j];
2239 if(level){
2240 level+=64;
2241 if((level&(~127)) == 0){
2242 bits+= length[UNI_AC_ENC_INDEX(run, level)];
2243 }else
2244 bits+= esc_length;
2245 run=0;
2246 }else
2247 run++;
2249 i= scantable[last];
2251 level= temp[i] + 64;
2253 assert(level - 64);
2255 if((level&(~127)) == 0){
2256 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2257 }else
2258 bits+= esc_length;
2261 return bits;
2264 #define VSAD_INTRA(size) \
2265 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2266 int score=0; \
2267 int x,y; \
2269 for(y=1; y<h; y++){ \
2270 for(x=0; x<size; x+=4){ \
2271 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2272 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2274 s+= stride; \
2277 return score; \
2279 VSAD_INTRA(8)
2280 VSAD_INTRA(16)
2282 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2283 int score=0;
2284 int x,y;
2286 for(y=1; y<h; y++){
2287 for(x=0; x<16; x++){
2288 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2290 s1+= stride;
2291 s2+= stride;
2294 return score;
2297 #define SQ(a) ((a)*(a))
2298 #define VSSE_INTRA(size) \
2299 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2300 int score=0; \
2301 int x,y; \
2303 for(y=1; y<h; y++){ \
2304 for(x=0; x<size; x+=4){ \
2305 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2306 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2308 s+= stride; \
2311 return score; \
2313 VSSE_INTRA(8)
2314 VSSE_INTRA(16)
2316 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2317 int score=0;
2318 int x,y;
2320 for(y=1; y<h; y++){
2321 for(x=0; x<16; x++){
2322 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2324 s1+= stride;
2325 s2+= stride;
2328 return score;
2331 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2332 int size){
2333 int score=0;
2334 int i;
2335 for(i=0; i<size; i++)
2336 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2337 return score;
2340 #define WRAPPER8_16_SQ(name8, name16)\
2341 static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2342 int score=0;\
2343 score +=name8(s, dst , src , stride, 8);\
2344 score +=name8(s, dst+8 , src+8 , stride, 8);\
2345 if(h==16){\
2346 dst += 8*stride;\
2347 src += 8*stride;\
2348 score +=name8(s, dst , src , stride, 8);\
2349 score +=name8(s, dst+8 , src+8 , stride, 8);\
2351 return score;\
2354 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2355 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2356 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2357 #if CONFIG_GPL
2358 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2359 #endif
2360 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2361 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2362 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2363 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2365 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2366 uint32_t maxi, uint32_t maxisign)
2369 if(a > mini) return mini;
2370 else if((a^(1U<<31)) > maxisign) return maxi;
2371 else return a;
2374 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2375 int i;
2376 uint32_t mini = *(uint32_t*)min;
2377 uint32_t maxi = *(uint32_t*)max;
2378 uint32_t maxisign = maxi ^ (1U<<31);
2379 uint32_t *dsti = (uint32_t*)dst;
2380 const uint32_t *srci = (const uint32_t*)src;
2381 for(i=0; i<len; i+=8) {
2382 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2383 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2384 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2385 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2386 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2387 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2388 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2389 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2392 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2393 int i;
2394 if(min < 0 && max > 0) {
2395 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2396 } else {
2397 for(i=0; i < len; i+=8) {
2398 dst[i ] = av_clipf(src[i ], min, max);
2399 dst[i + 1] = av_clipf(src[i + 1], min, max);
2400 dst[i + 2] = av_clipf(src[i + 2], min, max);
2401 dst[i + 3] = av_clipf(src[i + 3], min, max);
2402 dst[i + 4] = av_clipf(src[i + 4], min, max);
2403 dst[i + 5] = av_clipf(src[i + 5], min, max);
2404 dst[i + 6] = av_clipf(src[i + 6], min, max);
2405 dst[i + 7] = av_clipf(src[i + 7], min, max);
2410 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2412 int res = 0;
2414 while (order--)
2415 res += *v1++ * *v2++;
2417 return res;
2420 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2422 int res = 0;
2423 while (order--) {
2424 res += *v1 * *v2++;
2425 *v1++ += mul * *v3++;
2427 return res;
2430 static void apply_window_int16_c(int16_t *output, const int16_t *input,
2431 const int16_t *window, unsigned int len)
2433 int i;
2434 int len2 = len >> 1;
2436 for (i = 0; i < len2; i++) {
2437 int16_t w = window[i];
2438 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
2439 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2443 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2444 int32_t max, unsigned int len)
2446 do {
2447 *dst++ = av_clip(*src++, min, max);
2448 *dst++ = av_clip(*src++, min, max);
2449 *dst++ = av_clip(*src++, min, max);
2450 *dst++ = av_clip(*src++, min, max);
2451 *dst++ = av_clip(*src++, min, max);
2452 *dst++ = av_clip(*src++, min, max);
2453 *dst++ = av_clip(*src++, min, max);
2454 *dst++ = av_clip(*src++, min, max);
2455 len -= 8;
2456 } while (len > 0);
2459 static void ff_jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
2461 ff_j_rev_dct (block);
2462 put_pixels_clamped_c(block, dest, line_size);
2464 static void ff_jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
2466 ff_j_rev_dct (block);
2467 add_pixels_clamped_c(block, dest, line_size);
2470 /* init static data */
2471 av_cold void ff_dsputil_static_init(void)
2473 int i;
2475 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
2476 for(i=0;i<MAX_NEG_CROP;i++) {
2477 ff_cropTbl[i] = 0;
2478 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
2481 for(i=0;i<512;i++) {
2482 ff_squareTbl[i] = (i - 256) * (i - 256);
2485 for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2488 int ff_check_alignment(void){
2489 static int did_fail=0;
2490 LOCAL_ALIGNED_16(int, aligned, [4]);
2492 if((intptr_t)aligned & 15){
2493 if(!did_fail){
2494 #if HAVE_MMX || HAVE_ALTIVEC
2495 av_log(NULL, AV_LOG_ERROR,
2496 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2497 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2498 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2499 "Do not report crashes to Libav developers.\n");
2500 #endif
2501 did_fail=1;
2503 return -1;
2505 return 0;
2508 av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2510 ff_check_alignment();
2512 #if CONFIG_ENCODERS
2513 if (avctx->bits_per_raw_sample == 10) {
2514 c->fdct = ff_jpeg_fdct_islow_10;
2515 c->fdct248 = ff_fdct248_islow_10;
2516 } else {
2517 if(avctx->dct_algo==FF_DCT_FASTINT) {
2518 c->fdct = ff_fdct_ifast;
2519 c->fdct248 = ff_fdct_ifast248;
2521 else if(avctx->dct_algo==FF_DCT_FAAN) {
2522 c->fdct = ff_faandct;
2523 c->fdct248 = ff_faandct248;
2525 else {
2526 c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
2527 c->fdct248 = ff_fdct248_islow_8;
2530 #endif //CONFIG_ENCODERS
2532 if (avctx->bits_per_raw_sample == 10) {
2533 c->idct_put = ff_simple_idct_put_10;
2534 c->idct_add = ff_simple_idct_add_10;
2535 c->idct = ff_simple_idct_10;
2536 c->idct_permutation_type = FF_NO_IDCT_PERM;
2537 } else {
2538 if(avctx->idct_algo==FF_IDCT_INT){
2539 c->idct_put= ff_jref_idct_put;
2540 c->idct_add= ff_jref_idct_add;
2541 c->idct = ff_j_rev_dct;
2542 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2543 }else if(avctx->idct_algo==FF_IDCT_FAAN){
2544 c->idct_put= ff_faanidct_put;
2545 c->idct_add= ff_faanidct_add;
2546 c->idct = ff_faanidct;
2547 c->idct_permutation_type= FF_NO_IDCT_PERM;
2548 }else{ //accurate/default
2549 c->idct_put = ff_simple_idct_put_8;
2550 c->idct_add = ff_simple_idct_add_8;
2551 c->idct = ff_simple_idct_8;
2552 c->idct_permutation_type= FF_NO_IDCT_PERM;
2556 c->diff_pixels = diff_pixels_c;
2557 c->put_pixels_clamped = put_pixels_clamped_c;
2558 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
2559 c->add_pixels_clamped = add_pixels_clamped_c;
2560 c->sum_abs_dctelem = sum_abs_dctelem_c;
2561 c->gmc1 = gmc1_c;
2562 c->gmc = ff_gmc_c;
2563 c->pix_sum = pix_sum_c;
2564 c->pix_norm1 = pix_norm1_c;
2566 c->fill_block_tab[0] = fill_block16_c;
2567 c->fill_block_tab[1] = fill_block8_c;
2569 /* TODO [0] 16 [1] 8 */
2570 c->pix_abs[0][0] = pix_abs16_c;
2571 c->pix_abs[0][1] = pix_abs16_x2_c;
2572 c->pix_abs[0][2] = pix_abs16_y2_c;
2573 c->pix_abs[0][3] = pix_abs16_xy2_c;
2574 c->pix_abs[1][0] = pix_abs8_c;
2575 c->pix_abs[1][1] = pix_abs8_x2_c;
2576 c->pix_abs[1][2] = pix_abs8_y2_c;
2577 c->pix_abs[1][3] = pix_abs8_xy2_c;
2579 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2580 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2581 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2582 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2583 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2584 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2585 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2586 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2587 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2589 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2590 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2591 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2592 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2593 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2594 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2595 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2596 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2597 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2599 #define dspfunc(PFX, IDX, NUM) \
2600 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2601 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2602 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2603 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2604 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2605 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2606 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2607 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2608 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2609 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2610 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2611 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2612 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2613 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2614 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2615 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2617 dspfunc(put_qpel, 0, 16);
2618 dspfunc(put_no_rnd_qpel, 0, 16);
2620 dspfunc(avg_qpel, 0, 16);
2621 /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2623 dspfunc(put_qpel, 1, 8);
2624 dspfunc(put_no_rnd_qpel, 1, 8);
2626 dspfunc(avg_qpel, 1, 8);
2627 /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2629 #undef dspfunc
2631 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2632 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2633 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2634 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2635 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2636 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2637 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2638 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2640 #define SET_CMP_FUNC(name) \
2641 c->name[0]= name ## 16_c;\
2642 c->name[1]= name ## 8x8_c;
2644 SET_CMP_FUNC(hadamard8_diff)
2645 c->hadamard8_diff[4]= hadamard8_intra16_c;
2646 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2647 SET_CMP_FUNC(dct_sad)
2648 SET_CMP_FUNC(dct_max)
2649 #if CONFIG_GPL
2650 SET_CMP_FUNC(dct264_sad)
2651 #endif
2652 c->sad[0]= pix_abs16_c;
2653 c->sad[1]= pix_abs8_c;
2654 c->sse[0]= sse16_c;
2655 c->sse[1]= sse8_c;
2656 c->sse[2]= sse4_c;
2657 SET_CMP_FUNC(quant_psnr)
2658 SET_CMP_FUNC(rd)
2659 SET_CMP_FUNC(bit)
2660 c->vsad[0]= vsad16_c;
2661 c->vsad[4]= vsad_intra16_c;
2662 c->vsad[5]= vsad_intra8_c;
2663 c->vsse[0]= vsse16_c;
2664 c->vsse[4]= vsse_intra16_c;
2665 c->vsse[5]= vsse_intra8_c;
2666 c->nsse[0]= nsse16_c;
2667 c->nsse[1]= nsse8_c;
2669 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2671 c->add_bytes= add_bytes_c;
2672 c->diff_bytes= diff_bytes_c;
2673 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2674 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2675 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
2676 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
2677 c->bswap_buf= bswap_buf;
2678 c->bswap16_buf = bswap16_buf;
2680 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2681 c->h263_h_loop_filter= h263_h_loop_filter_c;
2682 c->h263_v_loop_filter= h263_v_loop_filter_c;
2685 c->h261_loop_filter= h261_loop_filter_c;
2687 c->try_8x8basis= try_8x8basis_c;
2688 c->add_8x8basis= add_8x8basis_c;
2690 c->vector_clipf = vector_clipf_c;
2691 c->scalarproduct_int16 = scalarproduct_int16_c;
2692 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
2693 c->apply_window_int16 = apply_window_int16_c;
2694 c->vector_clip_int32 = vector_clip_int32_c;
2696 c->shrink[0]= av_image_copy_plane;
2697 c->shrink[1]= ff_shrink22;
2698 c->shrink[2]= ff_shrink44;
2699 c->shrink[3]= ff_shrink88;
2701 c->add_pixels8 = add_pixels8_c;
2703 #define hpel_funcs(prefix, idx, num) \
2704 c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \
2705 c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \
2706 c->prefix ## _pixels_tab idx [2] = prefix ## _pixels ## num ## _y2_8_c; \
2707 c->prefix ## _pixels_tab idx [3] = prefix ## _pixels ## num ## _xy2_8_c
2709 hpel_funcs(put, [0], 16);
2710 hpel_funcs(put, [1], 8);
2711 hpel_funcs(put, [2], 4);
2712 hpel_funcs(put, [3], 2);
2713 hpel_funcs(put_no_rnd, [0], 16);
2714 hpel_funcs(put_no_rnd, [1], 8);
2715 hpel_funcs(avg, [0], 16);
2716 hpel_funcs(avg, [1], 8);
2717 hpel_funcs(avg, [2], 4);
2718 hpel_funcs(avg, [3], 2);
2719 hpel_funcs(avg_no_rnd,, 16);
2721 #undef FUNC
2722 #undef FUNCC
2723 #define FUNC(f, depth) f ## _ ## depth
2724 #define FUNCC(f, depth) f ## _ ## depth ## _c
2726 #define BIT_DEPTH_FUNCS(depth, dct)\
2727 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
2728 c->draw_edges = FUNCC(draw_edges , depth);\
2729 c->clear_block = FUNCC(clear_block ## dct , depth);\
2730 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
2732 switch (avctx->bits_per_raw_sample) {
2733 case 9:
2734 if (c->dct_bits == 32) {
2735 BIT_DEPTH_FUNCS(9, _32);
2736 } else {
2737 BIT_DEPTH_FUNCS(9, _16);
2739 break;
2740 case 10:
2741 if (c->dct_bits == 32) {
2742 BIT_DEPTH_FUNCS(10, _32);
2743 } else {
2744 BIT_DEPTH_FUNCS(10, _16);
2746 break;
2747 default:
2748 BIT_DEPTH_FUNCS(8, _16);
2749 break;
2753 if (HAVE_MMX) ff_dsputil_init_mmx (c, avctx);
2754 if (ARCH_ARM) ff_dsputil_init_arm (c, avctx);
2755 if (HAVE_VIS) ff_dsputil_init_vis (c, avctx);
2756 if (ARCH_ALPHA) ff_dsputil_init_alpha (c, avctx);
2757 if (ARCH_PPC) ff_dsputil_init_ppc (c, avctx);
2758 if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
2759 if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
2761 ff_init_scantable_permutation(c->idct_permutation,
2762 c->idct_permutation_type);