2 * VC-1 and WMV3 decoder - DSP functions
3 * Copyright (c) 2006 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * VC-1 and WMV3 decoder
31 /** Apply overlap transform to horizontal edge
33 static void vc1_v_overlap_c(uint8_t* src
, int stride
)
39 for(i
= 0; i
< 8; i
++) {
44 d1
= (a
- d
+ 3 + rnd
) >> 3;
45 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
47 src
[-2*stride
] = a
- d1
;
48 src
[-stride
] = b
- d2
;
56 /** Apply overlap transform to vertical edge
58 static void vc1_h_overlap_c(uint8_t* src
, int stride
)
64 for(i
= 0; i
< 8; i
++) {
69 d1
= (a
- d
+ 3 + rnd
) >> 3;
70 d2
= (a
- d
+ b
- c
+ 4 - rnd
) >> 3;
82 /** Do inverse transform on 8x8 block
84 static void vc1_inv_trans_8x8_c(DCTELEM block
[64])
87 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
92 for(i
= 0; i
< 8; i
++){
93 t1
= 12 * (src
[0] + src
[4]) + 4;
94 t2
= 12 * (src
[0] - src
[4]) + 4;
95 t3
= 16 * src
[2] + 6 * src
[6];
96 t4
= 6 * src
[2] - 16 * src
[6];
103 t1
= 16 * src
[1] + 15 * src
[3] + 9 * src
[5] + 4 * src
[7];
104 t2
= 15 * src
[1] - 4 * src
[3] - 16 * src
[5] - 9 * src
[7];
105 t3
= 9 * src
[1] - 16 * src
[3] + 4 * src
[5] + 15 * src
[7];
106 t4
= 4 * src
[1] - 9 * src
[3] + 15 * src
[5] - 16 * src
[7];
108 dst
[0] = (t5
+ t1
) >> 3;
109 dst
[1] = (t6
+ t2
) >> 3;
110 dst
[2] = (t7
+ t3
) >> 3;
111 dst
[3] = (t8
+ t4
) >> 3;
112 dst
[4] = (t8
- t4
) >> 3;
113 dst
[5] = (t7
- t3
) >> 3;
114 dst
[6] = (t6
- t2
) >> 3;
115 dst
[7] = (t5
- t1
) >> 3;
123 for(i
= 0; i
< 8; i
++){
124 t1
= 12 * (src
[ 0] + src
[32]) + 64;
125 t2
= 12 * (src
[ 0] - src
[32]) + 64;
126 t3
= 16 * src
[16] + 6 * src
[48];
127 t4
= 6 * src
[16] - 16 * src
[48];
134 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
135 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
136 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
137 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
139 dst
[ 0] = (t5
+ t1
) >> 7;
140 dst
[ 8] = (t6
+ t2
) >> 7;
141 dst
[16] = (t7
+ t3
) >> 7;
142 dst
[24] = (t8
+ t4
) >> 7;
143 dst
[32] = (t8
- t4
+ 1) >> 7;
144 dst
[40] = (t7
- t3
+ 1) >> 7;
145 dst
[48] = (t6
- t2
+ 1) >> 7;
146 dst
[56] = (t5
- t1
+ 1) >> 7;
153 /** Do inverse transform on 8x4 part of block
155 static void vc1_inv_trans_8x4_c(uint8_t *dest
, int linesize
, DCTELEM
*block
)
158 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
160 const uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
164 for(i
= 0; i
< 4; i
++){
165 t1
= 12 * (src
[0] + src
[4]) + 4;
166 t2
= 12 * (src
[0] - src
[4]) + 4;
167 t3
= 16 * src
[2] + 6 * src
[6];
168 t4
= 6 * src
[2] - 16 * src
[6];
175 t1
= 16 * src
[1] + 15 * src
[3] + 9 * src
[5] + 4 * src
[7];
176 t2
= 15 * src
[1] - 4 * src
[3] - 16 * src
[5] - 9 * src
[7];
177 t3
= 9 * src
[1] - 16 * src
[3] + 4 * src
[5] + 15 * src
[7];
178 t4
= 4 * src
[1] - 9 * src
[3] + 15 * src
[5] - 16 * src
[7];
180 dst
[0] = (t5
+ t1
) >> 3;
181 dst
[1] = (t6
+ t2
) >> 3;
182 dst
[2] = (t7
+ t3
) >> 3;
183 dst
[3] = (t8
+ t4
) >> 3;
184 dst
[4] = (t8
- t4
) >> 3;
185 dst
[5] = (t7
- t3
) >> 3;
186 dst
[6] = (t6
- t2
) >> 3;
187 dst
[7] = (t5
- t1
) >> 3;
194 for(i
= 0; i
< 8; i
++){
195 t1
= 17 * (src
[ 0] + src
[16]) + 64;
196 t2
= 17 * (src
[ 0] - src
[16]) + 64;
197 t3
= 22 * src
[ 8] + 10 * src
[24];
198 t4
= 22 * src
[24] - 10 * src
[ 8];
200 dest
[0*linesize
] = cm
[dest
[0*linesize
] + ((t1
+ t3
) >> 7)];
201 dest
[1*linesize
] = cm
[dest
[1*linesize
] + ((t2
- t4
) >> 7)];
202 dest
[2*linesize
] = cm
[dest
[2*linesize
] + ((t2
+ t4
) >> 7)];
203 dest
[3*linesize
] = cm
[dest
[3*linesize
] + ((t1
- t3
) >> 7)];
210 /** Do inverse transform on 4x8 parts of block
212 static void vc1_inv_trans_4x8_c(uint8_t *dest
, int linesize
, DCTELEM
*block
)
215 register int t1
,t2
,t3
,t4
,t5
,t6
,t7
,t8
;
217 const uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
221 for(i
= 0; i
< 8; i
++){
222 t1
= 17 * (src
[0] + src
[2]) + 4;
223 t2
= 17 * (src
[0] - src
[2]) + 4;
224 t3
= 22 * src
[1] + 10 * src
[3];
225 t4
= 22 * src
[3] - 10 * src
[1];
227 dst
[0] = (t1
+ t3
) >> 3;
228 dst
[1] = (t2
- t4
) >> 3;
229 dst
[2] = (t2
+ t4
) >> 3;
230 dst
[3] = (t1
- t3
) >> 3;
237 for(i
= 0; i
< 4; i
++){
238 t1
= 12 * (src
[ 0] + src
[32]) + 64;
239 t2
= 12 * (src
[ 0] - src
[32]) + 64;
240 t3
= 16 * src
[16] + 6 * src
[48];
241 t4
= 6 * src
[16] - 16 * src
[48];
248 t1
= 16 * src
[ 8] + 15 * src
[24] + 9 * src
[40] + 4 * src
[56];
249 t2
= 15 * src
[ 8] - 4 * src
[24] - 16 * src
[40] - 9 * src
[56];
250 t3
= 9 * src
[ 8] - 16 * src
[24] + 4 * src
[40] + 15 * src
[56];
251 t4
= 4 * src
[ 8] - 9 * src
[24] + 15 * src
[40] - 16 * src
[56];
253 dest
[0*linesize
] = cm
[dest
[0*linesize
] + ((t5
+ t1
) >> 7)];
254 dest
[1*linesize
] = cm
[dest
[1*linesize
] + ((t6
+ t2
) >> 7)];
255 dest
[2*linesize
] = cm
[dest
[2*linesize
] + ((t7
+ t3
) >> 7)];
256 dest
[3*linesize
] = cm
[dest
[3*linesize
] + ((t8
+ t4
) >> 7)];
257 dest
[4*linesize
] = cm
[dest
[4*linesize
] + ((t8
- t4
+ 1) >> 7)];
258 dest
[5*linesize
] = cm
[dest
[5*linesize
] + ((t7
- t3
+ 1) >> 7)];
259 dest
[6*linesize
] = cm
[dest
[6*linesize
] + ((t6
- t2
+ 1) >> 7)];
260 dest
[7*linesize
] = cm
[dest
[7*linesize
] + ((t5
- t1
+ 1) >> 7)];
267 /** Do inverse transform on 4x4 part of block
269 static void vc1_inv_trans_4x4_c(uint8_t *dest
, int linesize
, DCTELEM
*block
)
272 register int t1
,t2
,t3
,t4
;
274 const uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
278 for(i
= 0; i
< 4; i
++){
279 t1
= 17 * (src
[0] + src
[2]) + 4;
280 t2
= 17 * (src
[0] - src
[2]) + 4;
281 t3
= 22 * src
[1] + 10 * src
[3];
282 t4
= 22 * src
[3] - 10 * src
[1];
284 dst
[0] = (t1
+ t3
) >> 3;
285 dst
[1] = (t2
- t4
) >> 3;
286 dst
[2] = (t2
+ t4
) >> 3;
287 dst
[3] = (t1
- t3
) >> 3;
294 for(i
= 0; i
< 4; i
++){
295 t1
= 17 * (src
[ 0] + src
[16]) + 64;
296 t2
= 17 * (src
[ 0] - src
[16]) + 64;
297 t3
= 22 * src
[ 8] + 10 * src
[24];
298 t4
= 22 * src
[24] - 10 * src
[ 8];
300 dest
[0*linesize
] = cm
[dest
[0*linesize
] + ((t1
+ t3
) >> 7)];
301 dest
[1*linesize
] = cm
[dest
[1*linesize
] + ((t2
- t4
) >> 7)];
302 dest
[2*linesize
] = cm
[dest
[2*linesize
] + ((t2
+ t4
) >> 7)];
303 dest
[3*linesize
] = cm
[dest
[3*linesize
] + ((t1
- t3
) >> 7)];
310 /* motion compensation functions */
311 /** Filter in case of 2 filters */
312 #define VC1_MSPEL_FILTER_16B(DIR, TYPE) \
313 static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, int stride, int mode) \
316 case 0: /* no shift - should not occur */ \
318 case 1: /* 1/4 shift */ \
319 return -4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2]; \
320 case 2: /* 1/2 shift */ \
321 return -src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2]; \
322 case 3: /* 3/4 shift */ \
323 return -3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2]; \
325 return 0; /* should not occur */ \
328 VC1_MSPEL_FILTER_16B(ver
, uint8_t);
329 VC1_MSPEL_FILTER_16B(hor
, int16_t);
332 /** Filter used to interpolate fractional pel values
334 static av_always_inline
int vc1_mspel_filter(const uint8_t *src
, int stride
, int mode
, int r
)
340 return (-4*src
[-stride
] + 53*src
[0] + 18*src
[stride
] - 3*src
[stride
*2] + 32 - r
) >> 6;
342 return (-src
[-stride
] + 9*src
[0] + 9*src
[stride
] - src
[stride
*2] + 8 - r
) >> 4;
344 return (-3*src
[-stride
] + 18*src
[0] + 53*src
[stride
] - 4*src
[stride
*2] + 32 - r
) >> 6;
346 return 0; //should not occur
349 /** Function used to do motion compensation with bicubic interpolation
351 static void vc1_mspel_mc(uint8_t *dst
, const uint8_t *src
, int stride
, int hmode
, int vmode
, int rnd
)
355 if (vmode
) { /* Horizontal filter to apply */
358 if (hmode
) { /* Vertical filter to apply, output to tmp */
359 static const int shift_value
[] = { 0, 5, 1, 5 };
360 int shift
= (shift_value
[hmode
]+shift_value
[vmode
])>>1;
361 int16_t tmp
[11*8], *tptr
= tmp
;
363 r
= (1<<(shift
-1)) + rnd
-1;
366 for(j
= 0; j
< 8; j
++) {
367 for(i
= 0; i
< 11; i
++)
368 tptr
[i
] = (vc1_mspel_ver_filter_16bits(src
+ i
, stride
, vmode
)+r
)>>shift
;
375 for(j
= 0; j
< 8; j
++) {
376 for(i
= 0; i
< 8; i
++)
377 dst
[i
] = av_clip_uint8((vc1_mspel_hor_filter_16bits(tptr
+ i
, 1, hmode
)+r
)>>7);
384 else { /* No horizontal filter, output 8 lines to dst */
387 for(j
= 0; j
< 8; j
++) {
388 for(i
= 0; i
< 8; i
++)
389 dst
[i
] = av_clip_uint8(vc1_mspel_filter(src
+ i
, stride
, vmode
, r
));
397 /* Horizontal mode with no vertical mode */
398 for(j
= 0; j
< 8; j
++) {
399 for(i
= 0; i
< 8; i
++)
400 dst
[i
] = av_clip_uint8(vc1_mspel_filter(src
+ i
, 1, hmode
, rnd
));
406 /* pixel functions - really are entry points to vc1_mspel_mc */
408 /* this one is defined in dsputil.c */
409 void ff_put_vc1_mspel_mc00_c(uint8_t *dst
, const uint8_t *src
, int stride
, int rnd
);
411 #define PUT_VC1_MSPEL(a, b)\
412 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
413 vc1_mspel_mc(dst, src, stride, a, b, rnd); \
435 void ff_vc1dsp_init(DSPContext
* dsp
, AVCodecContext
*avctx
) {
436 dsp
->vc1_inv_trans_8x8
= vc1_inv_trans_8x8_c
;
437 dsp
->vc1_inv_trans_4x8
= vc1_inv_trans_4x8_c
;
438 dsp
->vc1_inv_trans_8x4
= vc1_inv_trans_8x4_c
;
439 dsp
->vc1_inv_trans_4x4
= vc1_inv_trans_4x4_c
;
440 dsp
->vc1_h_overlap
= vc1_h_overlap_c
;
441 dsp
->vc1_v_overlap
= vc1_v_overlap_c
;
443 dsp
->put_vc1_mspel_pixels_tab
[ 0] = ff_put_vc1_mspel_mc00_c
;
444 dsp
->put_vc1_mspel_pixels_tab
[ 1] = put_vc1_mspel_mc10_c
;
445 dsp
->put_vc1_mspel_pixels_tab
[ 2] = put_vc1_mspel_mc20_c
;
446 dsp
->put_vc1_mspel_pixels_tab
[ 3] = put_vc1_mspel_mc30_c
;
447 dsp
->put_vc1_mspel_pixels_tab
[ 4] = put_vc1_mspel_mc01_c
;
448 dsp
->put_vc1_mspel_pixels_tab
[ 5] = put_vc1_mspel_mc11_c
;
449 dsp
->put_vc1_mspel_pixels_tab
[ 6] = put_vc1_mspel_mc21_c
;
450 dsp
->put_vc1_mspel_pixels_tab
[ 7] = put_vc1_mspel_mc31_c
;
451 dsp
->put_vc1_mspel_pixels_tab
[ 8] = put_vc1_mspel_mc02_c
;
452 dsp
->put_vc1_mspel_pixels_tab
[ 9] = put_vc1_mspel_mc12_c
;
453 dsp
->put_vc1_mspel_pixels_tab
[10] = put_vc1_mspel_mc22_c
;
454 dsp
->put_vc1_mspel_pixels_tab
[11] = put_vc1_mspel_mc32_c
;
455 dsp
->put_vc1_mspel_pixels_tab
[12] = put_vc1_mspel_mc03_c
;
456 dsp
->put_vc1_mspel_pixels_tab
[13] = put_vc1_mspel_mc13_c
;
457 dsp
->put_vc1_mspel_pixels_tab
[14] = put_vc1_mspel_mc23_c
;
458 dsp
->put_vc1_mspel_pixels_tab
[15] = put_vc1_mspel_mc33_c
;