2 * DSP functions for Indeo Video Interactive codecs (Indeo4 and Indeo5)
4 * Copyright (c) 2009-2011 Maxim Poliakovski
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * DSP functions (inverse transforms, motion compensation, wavelet recompostions)
26 * for Indeo Video Interactive codecs.
30 #include "ivi_common.h"
33 void ff_ivi_recompose53(const IVIPlaneDesc
*plane
, uint8_t *dst
,
37 int32_t p0
, p1
, p2
, p3
, tmp0
, tmp1
, tmp2
;
38 int32_t b0_1
, b0_2
, b1_1
, b1_2
, b1_3
, b2_1
, b2_2
, b2_3
, b2_4
, b2_5
, b2_6
;
39 int32_t b3_1
, b3_2
, b3_3
, b3_4
, b3_5
, b3_6
, b3_7
, b3_8
, b3_9
;
40 int32_t pitch
, back_pitch
;
41 const short *b0_ptr
, *b1_ptr
, *b2_ptr
, *b3_ptr
;
42 const int num_bands
= 4;
44 /* all bands should have the same pitch */
45 pitch
= plane
->bands
[0].pitch
;
47 /* pixels at the position "y-1" will be set to pixels at the "y" for the 1st iteration */
50 /* get pointers to the wavelet bands */
51 b0_ptr
= plane
->bands
[0].buf
;
52 b1_ptr
= plane
->bands
[1].buf
;
53 b2_ptr
= plane
->bands
[2].buf
;
54 b3_ptr
= plane
->bands
[3].buf
;
56 for (y
= 0; y
< plane
->height
; y
+= 2) {
57 /* load storage variables with values */
64 b1_1
= b1_ptr
[back_pitch
];
66 b1_3
= b1_1
- b1_2
*6 + b1_ptr
[pitch
];
70 b2_2
= b2_ptr
[0]; // b2[x, y ]
71 b2_3
= b2_2
; // b2[x+1,y ] = b2[x,y]
72 b2_5
= b2_ptr
[pitch
]; // b2[x ,y+1]
73 b2_6
= b2_5
; // b2[x+1,y+1] = b2[x,y+1]
77 b3_2
= b3_ptr
[back_pitch
]; // b3[x ,y-1]
78 b3_3
= b3_2
; // b3[x+1,y-1] = b3[x ,y-1]
79 b3_5
= b3_ptr
[0]; // b3[x ,y ]
80 b3_6
= b3_5
; // b3[x+1,y ] = b3[x ,y ]
81 b3_8
= b3_2
- b3_5
*6 + b3_ptr
[pitch
];
85 for (x
= 0, indx
= 0; x
< plane
->width
; x
+=2, indx
++) {
86 /* some values calculated in the previous iterations can */
87 /* be reused in the next ones, so do appropriate copying */
88 b2_1
= b2_2
; // b2[x-1,y ] = b2[x, y ]
89 b2_2
= b2_3
; // b2[x ,y ] = b2[x+1,y ]
90 b2_4
= b2_5
; // b2[x-1,y+1] = b2[x ,y+1]
91 b2_5
= b2_6
; // b2[x ,y+1] = b2[x+1,y+1]
92 b3_1
= b3_2
; // b3[x-1,y-1] = b3[x ,y-1]
93 b3_2
= b3_3
; // b3[x ,y-1] = b3[x+1,y-1]
94 b3_4
= b3_5
; // b3[x-1,y ] = b3[x ,y ]
95 b3_5
= b3_6
; // b3[x ,y ] = b3[x+1,y ]
96 b3_7
= b3_8
; // vert_HPF(x-1)
97 b3_8
= b3_9
; // vert_HPF(x )
99 p0
= p1
= p2
= p3
= 0;
101 /* process the LL-band by applying LPF both vertically and horizontally */
105 b0_1
= b0_ptr
[indx
+1];
106 b0_2
= b0_ptr
[pitch
+indx
+1];
111 p2
= (tmp0
+ tmp2
) << 3;
112 p3
= (tmp1
+ tmp2
+ b0_2
) << 2;
115 /* process the HL-band by applying HPF vertically and LPF horizontally */
119 b1_2
= b1_ptr
[indx
+1];
120 b1_1
= b1_ptr
[back_pitch
+indx
+1];
122 tmp2
= tmp1
- tmp0
*6 + b1_3
;
123 b1_3
= b1_1
- b1_2
*6 + b1_ptr
[pitch
+indx
+1];
125 p0
+= (tmp0
+ tmp1
) << 3;
126 p1
+= (tmp0
+ tmp1
+ b1_1
+ b1_2
) << 2;
128 p3
+= (tmp2
+ b1_3
) << 1;
131 /* process the LH-band by applying LPF vertically and HPF horizontally */
133 b2_3
= b2_ptr
[indx
+1];
134 b2_6
= b2_ptr
[pitch
+indx
+1];
137 tmp1
= b2_1
- b2_2
*6 + b2_3
;
141 p2
+= (tmp0
+ b2_4
+ b2_5
) << 2;
142 p3
+= (tmp1
+ b2_4
- b2_5
*6 + b2_6
) << 1;
145 /* process the HH-band by applying HPF both vertically and horizontally */
147 b3_6
= b3_ptr
[indx
+1]; // b3[x+1,y ]
148 b3_3
= b3_ptr
[back_pitch
+indx
+1]; // b3[x+1,y-1]
154 b3_9
= b3_3
- b3_6
*6 + b3_ptr
[pitch
+indx
+1];
156 p0
+= (tmp0
+ tmp1
) << 2;
157 p1
+= (tmp0
- tmp1
*6 + tmp2
) << 1;
158 p2
+= (b3_7
+ b3_8
) << 1;
159 p3
+= b3_7
- b3_8
*6 + b3_9
;
162 /* output four pixels */
163 dst
[x
] = av_clip_uint8((p0
>> 6) + 128);
164 dst
[x
+1] = av_clip_uint8((p1
>> 6) + 128);
165 dst
[dst_pitch
+x
] = av_clip_uint8((p2
>> 6) + 128);
166 dst
[dst_pitch
+x
+1] = av_clip_uint8((p3
>> 6) + 128);
169 dst
+= dst_pitch
<< 1;
180 void ff_ivi_recompose_haar(const IVIPlaneDesc
*plane
, uint8_t *dst
,
183 int x
, y
, indx
, b0
, b1
, b2
, b3
, p0
, p1
, p2
, p3
;
184 const short *b0_ptr
, *b1_ptr
, *b2_ptr
, *b3_ptr
;
187 /* all bands should have the same pitch */
188 pitch
= plane
->bands
[0].pitch
;
190 /* get pointers to the wavelet bands */
191 b0_ptr
= plane
->bands
[0].buf
;
192 b1_ptr
= plane
->bands
[1].buf
;
193 b2_ptr
= plane
->bands
[2].buf
;
194 b3_ptr
= plane
->bands
[3].buf
;
196 for (y
= 0; y
< plane
->height
; y
+= 2) {
197 for (x
= 0, indx
= 0; x
< plane
->width
; x
+= 2, indx
++) {
198 /* load coefficients */
199 b0
= b0_ptr
[indx
]; //should be: b0 = (num_bands > 0) ? b0_ptr[indx] : 0;
200 b1
= b1_ptr
[indx
]; //should be: b1 = (num_bands > 1) ? b1_ptr[indx] : 0;
201 b2
= b2_ptr
[indx
]; //should be: b2 = (num_bands > 2) ? b2_ptr[indx] : 0;
202 b3
= b3_ptr
[indx
]; //should be: b3 = (num_bands > 3) ? b3_ptr[indx] : 0;
204 /* haar wavelet recomposition */
205 p0
= (b0
+ b1
+ b2
+ b3
+ 2) >> 2;
206 p1
= (b0
+ b1
- b2
- b3
+ 2) >> 2;
207 p2
= (b0
- b1
+ b2
- b3
+ 2) >> 2;
208 p3
= (b0
- b1
- b2
+ b3
+ 2) >> 2;
210 /* bias, convert and output four pixels */
211 dst
[x
] = av_clip_uint8(p0
+ 128);
212 dst
[x
+ 1] = av_clip_uint8(p1
+ 128);
213 dst
[dst_pitch
+ x
] = av_clip_uint8(p2
+ 128);
214 dst
[dst_pitch
+ x
+ 1] = av_clip_uint8(p3
+ 128);
217 dst
+= dst_pitch
<< 1;
226 /** butterfly operation for the inverse Haar transform */
227 #define IVI_HAAR_BFLY(s1, s2, o1, o2, t) \
229 o1 = (s1 + s2) >> 1;\
232 /** inverse 8-point Haar transform */
233 #define INV_HAAR8(s1, s5, s3, s7, s2, s4, s6, s8,\
234 d1, d2, d3, d4, d5, d6, d7, d8,\
235 t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
236 t1 = s1 << 1; t5 = s5 << 1;\
237 IVI_HAAR_BFLY(t1, t5, t1, t5, t0); IVI_HAAR_BFLY(t1, s3, t1, t3, t0);\
238 IVI_HAAR_BFLY(t5, s7, t5, t7, t0); IVI_HAAR_BFLY(t1, s2, t1, t2, t0);\
239 IVI_HAAR_BFLY(t3, s4, t3, t4, t0); IVI_HAAR_BFLY(t5, s6, t5, t6, t0);\
240 IVI_HAAR_BFLY(t7, s8, t7, t8, t0);\
241 d1 = COMPENSATE(t1);\
242 d2 = COMPENSATE(t2);\
243 d3 = COMPENSATE(t3);\
244 d4 = COMPENSATE(t4);\
245 d5 = COMPENSATE(t5);\
246 d6 = COMPENSATE(t6);\
247 d7 = COMPENSATE(t7);\
248 d8 = COMPENSATE(t8); }
250 /** inverse 4-point Haar transform */
251 #define INV_HAAR4(s1, s3, s5, s7) {\
252 HAAR_BFLY(s1, s5); HAAR_BFLY(s1, s3); HAAR_BFLY(s5, s7);\
253 s1 = COMPENSATE(s1);\
254 s3 = COMPENSATE(s3);\
255 s5 = COMPENSATE(s5);\
256 s7 = COMPENSATE(s7); }
258 void ff_ivi_inverse_haar_8x8(const int32_t *in
, int16_t *out
, uint32_t pitch
,
259 const uint8_t *flags
)
261 int i
, shift
, sp1
, sp2
, sp3
, sp4
;
265 int t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
267 /* apply the InvHaar8 to all columns */
268 #define COMPENSATE(x) (x)
271 for (i
= 0; i
< 8; i
++) {
275 sp1
= src
[ 0] << shift
;
276 sp2
= src
[ 8] << shift
;
277 sp3
= src
[16] << shift
;
278 sp4
= src
[24] << shift
;
279 INV_HAAR8( sp1
, sp2
, sp3
, sp4
,
280 src
[32], src
[40], src
[48], src
[56],
281 dst
[ 0], dst
[ 8], dst
[16], dst
[24],
282 dst
[32], dst
[40], dst
[48], dst
[56],
283 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
285 dst
[ 0] = dst
[ 8] = dst
[16] = dst
[24] =
286 dst
[32] = dst
[40] = dst
[48] = dst
[56] = 0;
293 /* apply the InvHaar8 to all rows */
294 #define COMPENSATE(x) (x)
296 for (i
= 0; i
< 8; i
++) {
297 if ( !src
[0] && !src
[1] && !src
[2] && !src
[3]
298 && !src
[4] && !src
[5] && !src
[6] && !src
[7]) {
299 memset(out
, 0, 8 * sizeof(out
[0]));
301 INV_HAAR8(src
[0], src
[1], src
[2], src
[3],
302 src
[4], src
[5], src
[6], src
[7],
303 out
[0], out
[1], out
[2], out
[3],
304 out
[4], out
[5], out
[6], out
[7],
305 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
313 void ff_ivi_dc_haar_2d(const int32_t *in
, int16_t *out
, uint32_t pitch
,
319 dc_coeff
= (*in
+ 0) >> 3;
321 for (y
= 0; y
< blk_size
; out
+= pitch
, y
++) {
322 for (x
= 0; x
< blk_size
; x
++)
327 /** butterfly operation for the inverse slant transform */
328 #define IVI_SLANT_BFLY(s1, s2, o1, o2, t) \
333 /** This is a reflection a,b = 1/2, 5/4 for the inverse slant transform */
334 #define IVI_IREFLECT(s1, s2, o1, o2, t) \
335 t = ((s1 + s2*2 + 2) >> 2) + s1;\
336 o2 = ((s1*2 - s2 + 2) >> 2) - s2;\
339 /** This is a reflection a,b = 1/2, 7/8 for the inverse slant transform */
340 #define IVI_SLANT_PART4(s1, s2, o1, o2, t) \
341 t = s2 + ((s1*4 - s2 + 4) >> 3);\
342 o2 = s1 + ((-s1 - s2*4 + 4) >> 3);\
345 /** inverse slant8 transform */
346 #define IVI_INV_SLANT8(s1, s4, s8, s5, s2, s6, s3, s7,\
347 d1, d2, d3, d4, d5, d6, d7, d8,\
348 t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
349 IVI_SLANT_PART4(s4, s5, t4, t5, t0);\
351 IVI_SLANT_BFLY(s1, t5, t1, t5, t0); IVI_SLANT_BFLY(s2, s6, t2, t6, t0);\
352 IVI_SLANT_BFLY(s7, s3, t7, t3, t0); IVI_SLANT_BFLY(t4, s8, t4, t8, t0);\
354 IVI_SLANT_BFLY(t1, t2, t1, t2, t0); IVI_IREFLECT (t4, t3, t4, t3, t0);\
355 IVI_SLANT_BFLY(t5, t6, t5, t6, t0); IVI_IREFLECT (t8, t7, t8, t7, t0);\
356 IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
357 IVI_SLANT_BFLY(t5, t8, t5, t8, t0); IVI_SLANT_BFLY(t6, t7, t6, t7, t0);\
358 d1 = COMPENSATE(t1);\
359 d2 = COMPENSATE(t2);\
360 d3 = COMPENSATE(t3);\
361 d4 = COMPENSATE(t4);\
362 d5 = COMPENSATE(t5);\
363 d6 = COMPENSATE(t6);\
364 d7 = COMPENSATE(t7);\
365 d8 = COMPENSATE(t8);}
367 /** inverse slant4 transform */
368 #define IVI_INV_SLANT4(s1, s4, s2, s3, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\
369 IVI_SLANT_BFLY(s1, s2, t1, t2, t0); IVI_IREFLECT (s4, s3, t4, t3, t0);\
371 IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
372 d1 = COMPENSATE(t1);\
373 d2 = COMPENSATE(t2);\
374 d3 = COMPENSATE(t3);\
375 d4 = COMPENSATE(t4);}
377 void ff_ivi_inverse_slant_8x8(const int32_t *in
, int16_t *out
, uint32_t pitch
, const uint8_t *flags
)
383 int t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
385 #define COMPENSATE(x) (x)
388 for (i
= 0; i
< 8; i
++) {
390 IVI_INV_SLANT8(src
[0], src
[8], src
[16], src
[24], src
[32], src
[40], src
[48], src
[56],
391 dst
[0], dst
[8], dst
[16], dst
[24], dst
[32], dst
[40], dst
[48], dst
[56],
392 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
394 dst
[0] = dst
[8] = dst
[16] = dst
[24] = dst
[32] = dst
[40] = dst
[48] = dst
[56] = 0;
401 #define COMPENSATE(x) ((x + 1)>>1)
403 for (i
= 0; i
< 8; i
++) {
404 if (!src
[0] && !src
[1] && !src
[2] && !src
[3] && !src
[4] && !src
[5] && !src
[6] && !src
[7]) {
405 memset(out
, 0, 8*sizeof(out
[0]));
407 IVI_INV_SLANT8(src
[0], src
[1], src
[2], src
[3], src
[4], src
[5], src
[6], src
[7],
408 out
[0], out
[1], out
[2], out
[3], out
[4], out
[5], out
[6], out
[7],
409 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
417 void ff_ivi_inverse_slant_4x4(const int32_t *in
, int16_t *out
, uint32_t pitch
, const uint8_t *flags
)
423 int t0
, t1
, t2
, t3
, t4
;
425 #define COMPENSATE(x) (x)
428 for (i
= 0; i
< 4; i
++) {
430 IVI_INV_SLANT4(src
[0], src
[4], src
[8], src
[12],
431 dst
[0], dst
[4], dst
[8], dst
[12],
434 dst
[0] = dst
[4] = dst
[8] = dst
[12] = 0;
441 #define COMPENSATE(x) ((x + 1)>>1)
443 for (i
= 0; i
< 4; i
++) {
444 if (!src
[0] && !src
[1] && !src
[2] && !src
[3]) {
445 out
[0] = out
[1] = out
[2] = out
[3] = 0;
447 IVI_INV_SLANT4(src
[0], src
[1], src
[2], src
[3],
448 out
[0], out
[1], out
[2], out
[3],
457 void ff_ivi_dc_slant_2d(const int32_t *in
, int16_t *out
, uint32_t pitch
, int blk_size
)
462 dc_coeff
= (*in
+ 1) >> 1;
464 for (y
= 0; y
< blk_size
; out
+= pitch
, y
++) {
465 for (x
= 0; x
< blk_size
; x
++)
470 void ff_ivi_row_slant8(const int32_t *in
, int16_t *out
, uint32_t pitch
, const uint8_t *flags
)
473 int t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
475 #define COMPENSATE(x) ((x + 1)>>1)
476 for (i
= 0; i
< 8; i
++) {
477 if (!in
[0] && !in
[1] && !in
[2] && !in
[3] && !in
[4] && !in
[5] && !in
[6] && !in
[7]) {
478 memset(out
, 0, 8*sizeof(out
[0]));
480 IVI_INV_SLANT8( in
[0], in
[1], in
[2], in
[3], in
[4], in
[5], in
[6], in
[7],
481 out
[0], out
[1], out
[2], out
[3], out
[4], out
[5], out
[6], out
[7],
482 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
490 void ff_ivi_dc_row_slant(const int32_t *in
, int16_t *out
, uint32_t pitch
, int blk_size
)
495 dc_coeff
= (*in
+ 1) >> 1;
497 for (x
= 0; x
< blk_size
; x
++)
502 for (y
= 1; y
< blk_size
; out
+= pitch
, y
++) {
503 for (x
= 0; x
< blk_size
; x
++)
508 void ff_ivi_col_slant8(const int32_t *in
, int16_t *out
, uint32_t pitch
, const uint8_t *flags
)
510 int i
, row2
, row4
, row8
;
511 int t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
517 #define COMPENSATE(x) ((x + 1)>>1)
518 for (i
= 0; i
< 8; i
++) {
520 IVI_INV_SLANT8(in
[0], in
[8], in
[16], in
[24], in
[32], in
[40], in
[48], in
[56],
521 out
[0], out
[pitch
], out
[row2
], out
[row2
+ pitch
], out
[row4
],
522 out
[row4
+ pitch
], out
[row4
+ row2
], out
[row8
- pitch
],
523 t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
);
525 out
[0] = out
[pitch
] = out
[row2
] = out
[row2
+ pitch
] = out
[row4
] =
526 out
[row4
+ pitch
] = out
[row4
+ row2
] = out
[row8
- pitch
] = 0;
535 void ff_ivi_dc_col_slant(const int32_t *in
, int16_t *out
, uint32_t pitch
, int blk_size
)
540 dc_coeff
= (*in
+ 1) >> 1;
542 for (y
= 0; y
< blk_size
; out
+= pitch
, y
++) {
544 for (x
= 1; x
< blk_size
; x
++)
549 void ff_ivi_put_pixels_8x8(const int32_t *in
, int16_t *out
, uint32_t pitch
,
550 const uint8_t *flags
)
554 for (y
= 0; y
< 8; out
+= pitch
, in
+= 8, y
++)
555 for (x
= 0; x
< 8; x
++)
559 void ff_ivi_put_dc_pixel_8x8(const int32_t *in
, int16_t *out
, uint32_t pitch
,
565 memset(out
+ 1, 0, 7*sizeof(out
[0]));
568 for (y
= 1; y
< 8; out
+= pitch
, y
++)
569 memset(out
, 0, 8*sizeof(out
[0]));
572 #define IVI_MC_TEMPLATE(size, suffix, OP) \
573 void ff_ivi_mc_ ## size ##x## size ## suffix (int16_t *buf, const int16_t *ref_buf, \
574 uint32_t pitch, int mc_type) \
577 const int16_t *wptr; \
580 case 0: /* fullpel (no interpolation) */ \
581 for (i = 0; i < size; i++, buf += pitch, ref_buf += pitch) { \
582 for (j = 0; j < size; j++) {\
583 OP(buf[j], ref_buf[j]); \
587 case 1: /* horizontal halfpel interpolation */ \
588 for (i = 0; i < size; i++, buf += pitch, ref_buf += pitch) \
589 for (j = 0; j < size; j++) \
590 OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
592 case 2: /* vertical halfpel interpolation */ \
593 wptr = ref_buf + pitch; \
594 for (i = 0; i < size; i++, buf += pitch, wptr += pitch, ref_buf += pitch) \
595 for (j = 0; j < size; j++) \
596 OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
598 case 3: /* vertical and horizontal halfpel interpolation */ \
599 wptr = ref_buf + pitch; \
600 for (i = 0; i < size; i++, buf += pitch, wptr += pitch, ref_buf += pitch) \
601 for (j = 0; j < size; j++) \
602 OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
607 #define OP_PUT(a, b) (a) = (b)
608 #define OP_ADD(a, b) (a) += (b)
610 IVI_MC_TEMPLATE(8, _no_delta
, OP_PUT
)
611 IVI_MC_TEMPLATE(8, _delta
, OP_ADD
)
612 IVI_MC_TEMPLATE(4, _no_delta
, OP_PUT
)
613 IVI_MC_TEMPLATE(4, _delta
, OP_ADD
)