2 * VC-1 and WMV3 decoder - DSP functions
3 * Copyright (c) 2006 Konstantin Shishkov
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * VC-1 and WMV3 decoder
28 #include "libavutil/common.h"
29 #include "h264chroma.h"
33 /** Apply overlap transform to horizontal edge
35 static void vc1_v_overlap_c(uint8_t* src
, int stride
)
41 for(i
= 0; i
< 8; i
++) {
46 d1
= (a
- d
+ 3 + rnd
) >> 3;
47 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
49 src
[-2*stride
] = a
- d1
;
50 src
[-stride
] = av_clip_uint8(b
- d2
);
51 src
[0] = av_clip_uint8(c
+ d2
);
58 /** Apply overlap transform to vertical edge
60 static void vc1_h_overlap_c(uint8_t* src
, int stride
)
66 for(i
= 0; i
< 8; i
++) {
71 d1
= (a
- d
+ 3 + rnd
) >> 3;
72 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
75 src
[-1] = av_clip_uint8(b
- d2
);
76 src
[0] = av_clip_uint8(c
+ d2
);
83 static void vc1_v_s_overlap_c(int16_t *top
, int16_t *bottom
)
88 int rnd1
= 4, rnd2
= 3;
89 for(i
= 0; i
< 8; i
++) {
97 top
[48] = ((a
<< 3) - d1
+ rnd1
) >> 3;
98 top
[56] = ((b
<< 3) - d2
+ rnd2
) >> 3;
99 bottom
[0] = ((c
<< 3) + d2
+ rnd1
) >> 3;
100 bottom
[8] = ((d
<< 3) + d1
+ rnd2
) >> 3;
109 static void vc1_h_s_overlap_c(int16_t *left
, int16_t *right
)
114 int rnd1
= 4, rnd2
= 3;
115 for(i
= 0; i
< 8; i
++) {
123 left
[6] = ((a
<< 3) - d1
+ rnd1
) >> 3;
124 left
[7] = ((b
<< 3) - d2
+ rnd2
) >> 3;
125 right
[0] = ((c
<< 3) + d2
+ rnd1
) >> 3;
126 right
[1] = ((d
<< 3) + d1
+ rnd2
) >> 3;
136 * VC-1 in-loop deblocking filter for one line
137 * @param src source block type
138 * @param stride block stride
139 * @param pq block quantizer
140 * @return whether other 3 pairs should be filtered or not
143 static av_always_inline
int vc1_filter_line(uint8_t* src
, int stride
, int pq
){
144 int a0
= (2*(src
[-2*stride
] - src
[ 1*stride
]) - 5*(src
[-1*stride
] - src
[ 0*stride
]) + 4) >> 3;
145 int a0_sign
= a0
>> 31; /* Store sign */
146 a0
= (a0
^ a0_sign
) - a0_sign
; /* a0 = FFABS(a0); */
148 int a1
= FFABS((2*(src
[-4*stride
] - src
[-1*stride
]) - 5*(src
[-3*stride
] - src
[-2*stride
]) + 4) >> 3);
149 int a2
= FFABS((2*(src
[ 0*stride
] - src
[ 3*stride
]) - 5*(src
[ 1*stride
] - src
[ 2*stride
]) + 4) >> 3);
150 if(a1
< a0
|| a2
< a0
){
151 int clip
= src
[-1*stride
] - src
[ 0*stride
];
152 int clip_sign
= clip
>> 31;
153 clip
= ((clip
^ clip_sign
) - clip_sign
)>>1;
155 int a3
= FFMIN(a1
, a2
);
156 int d
= 5 * (a3
- a0
);
157 int d_sign
= (d
>> 31);
158 d
= ((d
^ d_sign
) - d_sign
) >> 3;
161 if( d_sign
^ clip_sign
)
165 d
= (d
^ d_sign
) - d_sign
; /* Restore sign */
166 src
[-1*stride
] = av_clip_uint8(src
[-1*stride
] - d
);
167 src
[ 0*stride
] = av_clip_uint8(src
[ 0*stride
] + d
);
177 * VC-1 in-loop deblocking filter
178 * @param src source block type
179 * @param step distance between horizontally adjacent elements
180 * @param stride distance between vertically adjacent elements
181 * @param len edge length to filter (4 or 8 pixels)
182 * @param pq block quantizer
185 static inline void vc1_loop_filter(uint8_t* src
, int step
, int stride
, int len
, int pq
)
190 for(i
= 0; i
< len
; i
+= 4){
191 filt3
= vc1_filter_line(src
+ 2*step
, stride
, pq
);
193 vc1_filter_line(src
+ 0*step
, stride
, pq
);
194 vc1_filter_line(src
+ 1*step
, stride
, pq
);
195 vc1_filter_line(src
+ 3*step
, stride
, pq
);
201 static void vc1_v_loop_filter4_c(uint8_t *src
, int stride
, int pq
)
203 vc1_loop_filter(src
, 1, stride
, 4, pq
);
206 static void vc1_h_loop_filter4_c(uint8_t *src
, int stride
, int pq
)
208 vc1_loop_filter(src
, stride
, 1, 4, pq
);
211 static void vc1_v_loop_filter8_c(uint8_t *src
, int stride
, int pq
)
213 vc1_loop_filter(src
, 1, stride
, 8, pq
);
216 static void vc1_h_loop_filter8_c(uint8_t *src
, int stride
, int pq
)
218 vc1_loop_filter(src
, stride
, 1, 8, pq
);
221 static void vc1_v_loop_filter16_c(uint8_t *src
, int stride
, int pq
)
223 vc1_loop_filter(src
, 1, stride
, 16, pq
);
226 static void vc1_h_loop_filter16_c(uint8_t *src
, int stride
, int pq
)
228 vc1_loop_filter(src
, stride
, 1, 16, pq
);
231 /** Do inverse transform on 8x8 block
233 static void vc1_inv_trans_8x8_dc_c(uint8_t *dest
, int linesize
, int16_t *block
)
237 dc
= (3 * dc
+ 1) >> 1;
238 dc
= (3 * dc
+ 16) >> 5;
239 for(i
= 0; i
< 8; i
++){
240 dest
[0] = av_clip_uint8(dest
[0] + dc
);
241 dest
[1] = av_clip_uint8(dest
[1] + dc
);
242 dest
[2] = av_clip_uint8(dest
[2] + dc
);
243 dest
[3] = av_clip_uint8(dest
[3] + dc
);
244 dest
[4] = av_clip_uint8(dest
[4] + dc
);
245 dest
[5] = av_clip_uint8(dest
[5] + dc
);
246 dest
[6] = av_clip_uint8(dest
[6] + dc
);
247 dest
[7] = av_clip_uint8(dest
[7] + dc
);
252 static void vc1_inv_trans_8x8_c(int16_t block
[64])
255 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
256 int16_t *src
, *dst
, temp
[64];
260 for(i
= 0; i
< 8; i
++){
261 t1
= 12 * (src
[ 0] + src
[32]) + 4;
262 t2
= 12 * (src
[ 0] - src
[32]) + 4;
263 t3
= 16 * src
[16] + 6 * src
[48];
264 t4
= 6 * src
[16] - 16 * src
[48];
271 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
272 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
273 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
274 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
276 dst
[0] = (t5
+ t1
) >> 3;
277 dst
[1] = (t6
+ t2
) >> 3;
278 dst
[2] = (t7
+ t3
) >> 3;
279 dst
[3] = (t8
+ t4
) >> 3;
280 dst
[4] = (t8
- t4
) >> 3;
281 dst
[5] = (t7
- t3
) >> 3;
282 dst
[6] = (t6
- t2
) >> 3;
283 dst
[7] = (t5
- t1
) >> 3;
291 for(i
= 0; i
< 8; i
++){
292 t1
= 12 * (src
[ 0] + src
[32]) + 64;
293 t2
= 12 * (src
[ 0] - src
[32]) + 64;
294 t3
= 16 * src
[16] + 6 * src
[48];
295 t4
= 6 * src
[16] - 16 * src
[48];
302 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
303 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
304 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
305 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
307 dst
[ 0] = (t5
+ t1
) >> 7;
308 dst
[ 8] = (t6
+ t2
) >> 7;
309 dst
[16] = (t7
+ t3
) >> 7;
310 dst
[24] = (t8
+ t4
) >> 7;
311 dst
[32] = (t8
- t4
+ 1) >> 7;
312 dst
[40] = (t7
- t3
+ 1) >> 7;
313 dst
[48] = (t6
- t2
+ 1) >> 7;
314 dst
[56] = (t5
- t1
+ 1) >> 7;
321 /** Do inverse transform on 8x4 part of block
323 static void vc1_inv_trans_8x4_dc_c(uint8_t *dest
, int linesize
, int16_t *block
)
327 dc
= ( 3 * dc
+ 1) >> 1;
328 dc
= (17 * dc
+ 64) >> 7;
329 for(i
= 0; i
< 4; i
++){
330 dest
[0] = av_clip_uint8(dest
[0] + dc
);
331 dest
[1] = av_clip_uint8(dest
[1] + dc
);
332 dest
[2] = av_clip_uint8(dest
[2] + dc
);
333 dest
[3] = av_clip_uint8(dest
[3] + dc
);
334 dest
[4] = av_clip_uint8(dest
[4] + dc
);
335 dest
[5] = av_clip_uint8(dest
[5] + dc
);
336 dest
[6] = av_clip_uint8(dest
[6] + dc
);
337 dest
[7] = av_clip_uint8(dest
[7] + dc
);
342 static void vc1_inv_trans_8x4_c(uint8_t *dest
, int linesize
, int16_t *block
)
345 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
350 for(i
= 0; i
< 4; i
++){
351 t1
= 12 * (src
[0] + src
[4]) + 4;
352 t2
= 12 * (src
[0] - src
[4]) + 4;
353 t3
= 16 * src
[2] + 6 * src
[6];
354 t4
= 6 * src
[2] - 16 * src
[6];
361 t1
= 16 * src
[1] + 15 * src
[3] + 9 * src
[5] + 4 * src
[7];
362 t2
= 15 * src
[1] - 4 * src
[3] - 16 * src
[5] - 9 * src
[7];
363 t3
= 9 * src
[1] - 16 * src
[3] + 4 * src
[5] + 15 * src
[7];
364 t4
= 4 * src
[1] - 9 * src
[3] + 15 * src
[5] - 16 * src
[7];
366 dst
[0] = (t5
+ t1
) >> 3;
367 dst
[1] = (t6
+ t2
) >> 3;
368 dst
[2] = (t7
+ t3
) >> 3;
369 dst
[3] = (t8
+ t4
) >> 3;
370 dst
[4] = (t8
- t4
) >> 3;
371 dst
[5] = (t7
- t3
) >> 3;
372 dst
[6] = (t6
- t2
) >> 3;
373 dst
[7] = (t5
- t1
) >> 3;
380 for(i
= 0; i
< 8; i
++){
381 t1
= 17 * (src
[ 0] + src
[16]) + 64;
382 t2
= 17 * (src
[ 0] - src
[16]) + 64;
383 t3
= 22 * src
[ 8] + 10 * src
[24];
384 t4
= 22 * src
[24] - 10 * src
[ 8];
386 dest
[0*linesize
] = av_clip_uint8(dest
[0*linesize
] + ((t1
+ t3
) >> 7));
387 dest
[1*linesize
] = av_clip_uint8(dest
[1*linesize
] + ((t2
- t4
) >> 7));
388 dest
[2*linesize
] = av_clip_uint8(dest
[2*linesize
] + ((t2
+ t4
) >> 7));
389 dest
[3*linesize
] = av_clip_uint8(dest
[3*linesize
] + ((t1
- t3
) >> 7));
396 /** Do inverse transform on 4x8 parts of block
398 static void vc1_inv_trans_4x8_dc_c(uint8_t *dest
, int linesize
, int16_t *block
)
402 dc
= (17 * dc
+ 4) >> 3;
403 dc
= (12 * dc
+ 64) >> 7;
404 for(i
= 0; i
< 8; i
++){
405 dest
[0] = av_clip_uint8(dest
[0] + dc
);
406 dest
[1] = av_clip_uint8(dest
[1] + dc
);
407 dest
[2] = av_clip_uint8(dest
[2] + dc
);
408 dest
[3] = av_clip_uint8(dest
[3] + dc
);
413 static void vc1_inv_trans_4x8_c(uint8_t *dest
, int linesize
, int16_t *block
)
416 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
421 for(i
= 0; i
< 8; i
++){
422 t1
= 17 * (src
[0] + src
[2]) + 4;
423 t2
= 17 * (src
[0] - src
[2]) + 4;
424 t3
= 22 * src
[1] + 10 * src
[3];
425 t4
= 22 * src
[3] - 10 * src
[1];
427 dst
[0] = (t1
+ t3
) >> 3;
428 dst
[1] = (t2
- t4
) >> 3;
429 dst
[2] = (t2
+ t4
) >> 3;
430 dst
[3] = (t1
- t3
) >> 3;
437 for(i
= 0; i
< 4; i
++){
438 t1
= 12 * (src
[ 0] + src
[32]) + 64;
439 t2
= 12 * (src
[ 0] - src
[32]) + 64;
440 t3
= 16 * src
[16] + 6 * src
[48];
441 t4
= 6 * src
[16] - 16 * src
[48];
448 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
449 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
450 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
451 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
453 dest
[0*linesize
] = av_clip_uint8(dest
[0*linesize
] + ((t5
+ t1
) >> 7));
454 dest
[1*linesize
] = av_clip_uint8(dest
[1*linesize
] + ((t6
+ t2
) >> 7));
455 dest
[2*linesize
] = av_clip_uint8(dest
[2*linesize
] + ((t7
+ t3
) >> 7));
456 dest
[3*linesize
] = av_clip_uint8(dest
[3*linesize
] + ((t8
+ t4
) >> 7));
457 dest
[4*linesize
] = av_clip_uint8(dest
[4*linesize
] + ((t8
- t4
+ 1) >> 7));
458 dest
[5*linesize
] = av_clip_uint8(dest
[5*linesize
] + ((t7
- t3
+ 1) >> 7));
459 dest
[6*linesize
] = av_clip_uint8(dest
[6*linesize
] + ((t6
- t2
+ 1) >> 7));
460 dest
[7*linesize
] = av_clip_uint8(dest
[7*linesize
] + ((t5
- t1
+ 1) >> 7));
467 /** Do inverse transform on 4x4 part of block
469 static void vc1_inv_trans_4x4_dc_c(uint8_t *dest
, int linesize
, int16_t *block
)
473 dc
= (17 * dc
+ 4) >> 3;
474 dc
= (17 * dc
+ 64) >> 7;
475 for(i
= 0; i
< 4; i
++){
476 dest
[0] = av_clip_uint8(dest
[0] + dc
);
477 dest
[1] = av_clip_uint8(dest
[1] + dc
);
478 dest
[2] = av_clip_uint8(dest
[2] + dc
);
479 dest
[3] = av_clip_uint8(dest
[3] + dc
);
484 static void vc1_inv_trans_4x4_c(uint8_t *dest
, int linesize
, int16_t *block
)
487 register int t1
,t2
,t3
,t4
;
492 for(i
= 0; i
< 4; i
++){
493 t1
= 17 * (src
[0] + src
[2]) + 4;
494 t2
= 17 * (src
[0] - src
[2]) + 4;
495 t3
= 22 * src
[1] + 10 * src
[3];
496 t4
= 22 * src
[3] - 10 * src
[1];
498 dst
[0] = (t1
+ t3
) >> 3;
499 dst
[1] = (t2
- t4
) >> 3;
500 dst
[2] = (t2
+ t4
) >> 3;
501 dst
[3] = (t1
- t3
) >> 3;
508 for(i
= 0; i
< 4; i
++){
509 t1
= 17 * (src
[ 0] + src
[16]) + 64;
510 t2
= 17 * (src
[ 0] - src
[16]) + 64;
511 t3
= 22 * src
[ 8] + 10 * src
[24];
512 t4
= 22 * src
[24] - 10 * src
[ 8];
514 dest
[0*linesize
] = av_clip_uint8(dest
[0*linesize
] + ((t1
+ t3
) >> 7));
515 dest
[1*linesize
] = av_clip_uint8(dest
[1*linesize
] + ((t2
- t4
) >> 7));
516 dest
[2*linesize
] = av_clip_uint8(dest
[2*linesize
] + ((t2
+ t4
) >> 7));
517 dest
[3*linesize
] = av_clip_uint8(dest
[3*linesize
] + ((t1
- t3
) >> 7));
524 /* motion compensation functions */
525 /** Filter in case of 2 filters */
526 #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \
527 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, int stride, int mode) \
530 case 0: /* no shift - should not occur */ \
532 case 1: /* 1/4 shift */ \
533 return -4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2]; \
534 case 2: /* 1/2 shift */ \
535 return -src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2]; \
536 case 3: /* 3/4 shift */ \
537 return -3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2]; \
539 return 0; /* should not occur */ \
542 VC1_MSPEL_FILTER_16B(ver
, uint8_t)
543 VC1_MSPEL_FILTER_16B(hor
, int16_t)
546 /** Filter used to interpolate fractional pel values
548 static av_always_inline
int vc1_mspel_filter(const uint8_t *src
, int stride
, int mode
, int r
)
554 return (-4*src
[-stride
] + 53*src
[0] + 18*src
[stride
] - 3*src
[stride
*2] + 32 - r
) >> 6;
556 return (-src
[-stride
] + 9*src
[0] + 9*src
[stride
] - src
[stride
*2] + 8 - r
) >> 4;
558 return (-3*src
[-stride
] + 18*src
[0] + 53*src
[stride
] - 4*src
[stride
*2] + 32 - r
) >> 6;
560 return 0; //should not occur
563 /** Function used to do motion compensation with bicubic interpolation
565 #define VC1_MSPEL_MC(OP, OPNAME)\
566 static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\
570 if (vmode) { /* Horizontal filter to apply */\
573 if (hmode) { /* Vertical filter to apply, output to tmp */\
574 static const int shift_value[] = { 0, 5, 1, 5 };\
575 int shift = (shift_value[hmode]+shift_value[vmode])>>1;\
576 int16_t tmp[11*8], *tptr = tmp;\
578 r = (1<<(shift-1)) + rnd-1;\
581 for(j = 0; j < 8; j++) {\
582 for(i = 0; i < 11; i++)\
583 tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;\
590 for(j = 0; j < 8; j++) {\
591 for(i = 0; i < 8; i++)\
592 OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);\
599 else { /* No horizontal filter, output 8 lines to dst */\
602 for(j = 0; j < 8; j++) {\
603 for(i = 0; i < 8; i++)\
604 OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));\
612 /* Horizontal mode with no vertical mode */\
613 for(j = 0; j < 8; j++) {\
614 for(i = 0; i < 8; i++)\
615 OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));\
621 #define op_put(a, b) a = av_clip_uint8(b)
622 #define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1
624 VC1_MSPEL_MC(op_put
, put_
)
625 VC1_MSPEL_MC(op_avg
, avg_
)
627 /* pixel functions - really are entry points to vc1_mspel_mc */
629 #define PUT_VC1_MSPEL(a, b)\
630 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
631 put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
633 static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
634 avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \
656 static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst
/*align 8*/, uint8_t *src
/*align 1*/, int stride
, int h
, int x
, int y
){
657 const int A
=(8-x
)*(8-y
);
658 const int B
=( x
)*(8-y
);
659 const int C
=(8-x
)*( y
);
660 const int D
=( x
)*( y
);
663 assert(x
<8 && y
<8 && x
>=0 && y
>=0);
667 dst
[0] = (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + 32 - 4) >> 6;
668 dst
[1] = (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + 32 - 4) >> 6;
669 dst
[2] = (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + 32 - 4) >> 6;
670 dst
[3] = (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + 32 - 4) >> 6;
671 dst
[4] = (A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + 32 - 4) >> 6;
672 dst
[5] = (A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + 32 - 4) >> 6;
673 dst
[6] = (A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + 32 - 4) >> 6;
674 dst
[7] = (A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + 32 - 4) >> 6;
680 static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst
, uint8_t *src
, int stride
, int h
, int x
, int y
){
681 const int A
=(8-x
)*(8-y
);
682 const int B
=( x
)*(8-y
);
683 const int C
=(8-x
)*( y
);
684 const int D
=( x
)*( y
);
687 assert(x
<8 && y
<8 && x
>=0 && y
>=0);
691 dst
[0] = (A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + 32 - 4) >> 6;
692 dst
[1] = (A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + 32 - 4) >> 6;
693 dst
[2] = (A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + 32 - 4) >> 6;
694 dst
[3] = (A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + 32 - 4) >> 6;
700 #define avg2(a,b) ((a+b+1)>>1)
701 static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst
/*align 8*/, uint8_t *src
/*align 1*/, int stride
, int h
, int x
, int y
){
702 const int A
=(8-x
)*(8-y
);
703 const int B
=( x
)*(8-y
);
704 const int C
=(8-x
)*( y
);
705 const int D
=( x
)*( y
);
708 assert(x
<8 && y
<8 && x
>=0 && y
>=0);
712 dst
[0] = avg2(dst
[0], ((A
*src
[0] + B
*src
[1] + C
*src
[stride
+0] + D
*src
[stride
+1] + 32 - 4) >> 6));
713 dst
[1] = avg2(dst
[1], ((A
*src
[1] + B
*src
[2] + C
*src
[stride
+1] + D
*src
[stride
+2] + 32 - 4) >> 6));
714 dst
[2] = avg2(dst
[2], ((A
*src
[2] + B
*src
[3] + C
*src
[stride
+2] + D
*src
[stride
+3] + 32 - 4) >> 6));
715 dst
[3] = avg2(dst
[3], ((A
*src
[3] + B
*src
[4] + C
*src
[stride
+3] + D
*src
[stride
+4] + 32 - 4) >> 6));
716 dst
[4] = avg2(dst
[4], ((A
*src
[4] + B
*src
[5] + C
*src
[stride
+4] + D
*src
[stride
+5] + 32 - 4) >> 6));
717 dst
[5] = avg2(dst
[5], ((A
*src
[5] + B
*src
[6] + C
*src
[stride
+5] + D
*src
[stride
+6] + 32 - 4) >> 6));
718 dst
[6] = avg2(dst
[6], ((A
*src
[6] + B
*src
[7] + C
*src
[stride
+6] + D
*src
[stride
+7] + 32 - 4) >> 6));
719 dst
[7] = avg2(dst
[7], ((A
*src
[7] + B
*src
[8] + C
*src
[stride
+7] + D
*src
[stride
+8] + 32 - 4) >> 6));
725 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
727 static void sprite_h_c(uint8_t *dst
, const uint8_t *src
, int offset
, int advance
, int count
)
730 int a
= src
[(offset
>> 16) ];
731 int b
= src
[(offset
>> 16) + 1];
732 *dst
++ = a
+ ((b
- a
) * (offset
&0xFFFF) >> 16);
737 static av_always_inline
void sprite_v_template(uint8_t *dst
, const uint8_t *src1a
, const uint8_t *src1b
, int offset1
,
738 int two_sprites
, const uint8_t *src2a
, const uint8_t *src2b
, int offset2
,
739 int alpha
, int scaled
, int width
)
746 a1
= a1
+ ((b1
- a1
) * offset1
>> 16);
752 a2
= a2
+ ((b2
- a2
) * offset2
>> 16);
754 a1
= a1
+ ((a2
- a1
) * alpha
>> 16);
760 static void sprite_v_single_c(uint8_t *dst
, const uint8_t *src1a
, const uint8_t *src1b
, int offset
, int width
)
762 sprite_v_template(dst
, src1a
, src1b
, offset
, 0, NULL
, NULL
, 0, 0, 1, width
);
765 static void sprite_v_double_noscale_c(uint8_t *dst
, const uint8_t *src1a
, const uint8_t *src2a
, int alpha
, int width
)
767 sprite_v_template(dst
, src1a
, NULL
, 0, 1, src2a
, NULL
, 0, alpha
, 0, width
);
770 static void sprite_v_double_onescale_c(uint8_t *dst
, const uint8_t *src1a
, const uint8_t *src1b
, int offset1
,
771 const uint8_t *src2a
, int alpha
, int width
)
773 sprite_v_template(dst
, src1a
, src1b
, offset1
, 1, src2a
, NULL
, 0, alpha
, 1, width
);
776 static void sprite_v_double_twoscale_c(uint8_t *dst
, const uint8_t *src1a
, const uint8_t *src1b
, int offset1
,
777 const uint8_t *src2a
, const uint8_t *src2b
, int offset2
,
778 int alpha
, int width
)
780 sprite_v_template(dst
, src1a
, src1b
, offset1
, 1, src2a
, src2b
, offset2
, alpha
, 2, width
);
785 av_cold
void ff_vc1dsp_init(VC1DSPContext
* dsp
) {
786 dsp
->vc1_inv_trans_8x8
= vc1_inv_trans_8x8_c
;
787 dsp
->vc1_inv_trans_4x8
= vc1_inv_trans_4x8_c
;
788 dsp
->vc1_inv_trans_8x4
= vc1_inv_trans_8x4_c
;
789 dsp
->vc1_inv_trans_4x4
= vc1_inv_trans_4x4_c
;
790 dsp
->vc1_inv_trans_8x8_dc
= vc1_inv_trans_8x8_dc_c
;
791 dsp
->vc1_inv_trans_4x8_dc
= vc1_inv_trans_4x8_dc_c
;
792 dsp
->vc1_inv_trans_8x4_dc
= vc1_inv_trans_8x4_dc_c
;
793 dsp
->vc1_inv_trans_4x4_dc
= vc1_inv_trans_4x4_dc_c
;
794 dsp
->vc1_h_overlap
= vc1_h_overlap_c
;
795 dsp
->vc1_v_overlap
= vc1_v_overlap_c
;
796 dsp
->vc1_h_s_overlap
= vc1_h_s_overlap_c
;
797 dsp
->vc1_v_s_overlap
= vc1_v_s_overlap_c
;
798 dsp
->vc1_v_loop_filter4
= vc1_v_loop_filter4_c
;
799 dsp
->vc1_h_loop_filter4
= vc1_h_loop_filter4_c
;
800 dsp
->vc1_v_loop_filter8
= vc1_v_loop_filter8_c
;
801 dsp
->vc1_h_loop_filter8
= vc1_h_loop_filter8_c
;
802 dsp
->vc1_v_loop_filter16
= vc1_v_loop_filter16_c
;
803 dsp
->vc1_h_loop_filter16
= vc1_h_loop_filter16_c
;
805 dsp
->put_vc1_mspel_pixels_tab
[ 0] = ff_put_pixels8x8_c
;
806 dsp
->put_vc1_mspel_pixels_tab
[ 1] = put_vc1_mspel_mc10_c
;
807 dsp
->put_vc1_mspel_pixels_tab
[ 2] = put_vc1_mspel_mc20_c
;
808 dsp
->put_vc1_mspel_pixels_tab
[ 3] = put_vc1_mspel_mc30_c
;
809 dsp
->put_vc1_mspel_pixels_tab
[ 4] = put_vc1_mspel_mc01_c
;
810 dsp
->put_vc1_mspel_pixels_tab
[ 5] = put_vc1_mspel_mc11_c
;
811 dsp
->put_vc1_mspel_pixels_tab
[ 6] = put_vc1_mspel_mc21_c
;
812 dsp
->put_vc1_mspel_pixels_tab
[ 7] = put_vc1_mspel_mc31_c
;
813 dsp
->put_vc1_mspel_pixels_tab
[ 8] = put_vc1_mspel_mc02_c
;
814 dsp
->put_vc1_mspel_pixels_tab
[ 9] = put_vc1_mspel_mc12_c
;
815 dsp
->put_vc1_mspel_pixels_tab
[10] = put_vc1_mspel_mc22_c
;
816 dsp
->put_vc1_mspel_pixels_tab
[11] = put_vc1_mspel_mc32_c
;
817 dsp
->put_vc1_mspel_pixels_tab
[12] = put_vc1_mspel_mc03_c
;
818 dsp
->put_vc1_mspel_pixels_tab
[13] = put_vc1_mspel_mc13_c
;
819 dsp
->put_vc1_mspel_pixels_tab
[14] = put_vc1_mspel_mc23_c
;
820 dsp
->put_vc1_mspel_pixels_tab
[15] = put_vc1_mspel_mc33_c
;
822 dsp
->avg_vc1_mspel_pixels_tab
[ 0] = ff_avg_pixels8x8_c
;
823 dsp
->avg_vc1_mspel_pixels_tab
[ 1] = avg_vc1_mspel_mc10_c
;
824 dsp
->avg_vc1_mspel_pixels_tab
[ 2] = avg_vc1_mspel_mc20_c
;
825 dsp
->avg_vc1_mspel_pixels_tab
[ 3] = avg_vc1_mspel_mc30_c
;
826 dsp
->avg_vc1_mspel_pixels_tab
[ 4] = avg_vc1_mspel_mc01_c
;
827 dsp
->avg_vc1_mspel_pixels_tab
[ 5] = avg_vc1_mspel_mc11_c
;
828 dsp
->avg_vc1_mspel_pixels_tab
[ 6] = avg_vc1_mspel_mc21_c
;
829 dsp
->avg_vc1_mspel_pixels_tab
[ 7] = avg_vc1_mspel_mc31_c
;
830 dsp
->avg_vc1_mspel_pixels_tab
[ 8] = avg_vc1_mspel_mc02_c
;
831 dsp
->avg_vc1_mspel_pixels_tab
[ 9] = avg_vc1_mspel_mc12_c
;
832 dsp
->avg_vc1_mspel_pixels_tab
[10] = avg_vc1_mspel_mc22_c
;
833 dsp
->avg_vc1_mspel_pixels_tab
[11] = avg_vc1_mspel_mc32_c
;
834 dsp
->avg_vc1_mspel_pixels_tab
[12] = avg_vc1_mspel_mc03_c
;
835 dsp
->avg_vc1_mspel_pixels_tab
[13] = avg_vc1_mspel_mc13_c
;
836 dsp
->avg_vc1_mspel_pixels_tab
[14] = avg_vc1_mspel_mc23_c
;
837 dsp
->avg_vc1_mspel_pixels_tab
[15] = avg_vc1_mspel_mc33_c
;
839 dsp
->put_no_rnd_vc1_chroma_pixels_tab
[0]= put_no_rnd_vc1_chroma_mc8_c
;
840 dsp
->avg_no_rnd_vc1_chroma_pixels_tab
[0]= avg_no_rnd_vc1_chroma_mc8_c
;
841 dsp
->put_no_rnd_vc1_chroma_pixels_tab
[1] = put_no_rnd_vc1_chroma_mc4_c
;
843 #if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
844 dsp
->sprite_h
= sprite_h_c
;
845 dsp
->sprite_v_single
= sprite_v_single_c
;
846 dsp
->sprite_v_double_noscale
= sprite_v_double_noscale_c
;
847 dsp
->sprite_v_double_onescale
= sprite_v_double_onescale_c
;
848 dsp
->sprite_v_double_twoscale
= sprite_v_double_twoscale_c
;
852 ff_vc1dsp_init_altivec(dsp
);
854 ff_vc1dsp_init_x86(dsp
);