2 * Copyright (C) 2004 the ffmpeg project
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * Standard C DSP-oriented functions cribbed from the original VP3
30 #define IdctAdjustBeforeShift 8
39 static always_inline
void idct(uint8_t *dst
, int stride
, int16_t *input
, int type
)
42 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
44 int A_
, B_
, C_
, D_
, _Ad
, _Bd
, _Cd
, _Dd
, E_
, F_
, G_
, H_
;
45 int _Ed
, _Gd
, _Add
, _Bdd
, _Fd
, _Hd
;
50 /* Inverse DCT on the rows now */
51 for (i
= 0; i
< 8; i
++) {
52 /* Check for non-zero values */
53 if ( ip
[0] | ip
[1] | ip
[2] | ip
[3] | ip
[4] | ip
[5] | ip
[6] | ip
[7] ) {
54 t1
= (int32_t)(xC1S7
* ip
[1]);
55 t2
= (int32_t)(xC7S1
* ip
[7]);
60 t1
= (int32_t)(xC7S1
* ip
[1]);
61 t2
= (int32_t)(xC1S7
* ip
[7]);
66 t1
= (int32_t)(xC3S5
* ip
[3]);
67 t2
= (int32_t)(xC5S3
* ip
[5]);
72 t1
= (int32_t)(xC3S5
* ip
[5]);
73 t2
= (int32_t)(xC5S3
* ip
[3]);
79 t1
= (int32_t)(xC4S4
* (A_
- C_
));
83 t1
= (int32_t)(xC4S4
* (B_
- D_
));
91 t1
= (int32_t)(xC4S4
* (ip
[0] + ip
[4]));
95 t1
= (int32_t)(xC4S4
* (ip
[0] - ip
[4]));
99 t1
= (int32_t)(xC2S6
* ip
[2]);
100 t2
= (int32_t)(xC6S2
* ip
[6]);
105 t1
= (int32_t)(xC6S2
* ip
[2]);
106 t2
= (int32_t)(xC2S6
* ip
[6]);
121 /* Final sequence of operations over-write original inputs. */
136 ip
+= 8; /* next row */
141 for ( i
= 0; i
< 8; i
++) {
142 /* Check for non-zero values (bitwise or faster than ||) */
143 if ( ip
[1 * 8] | ip
[2 * 8] | ip
[3 * 8] |
144 ip
[4 * 8] | ip
[5 * 8] | ip
[6 * 8] | ip
[7 * 8] ) {
146 t1
= (int32_t)(xC1S7
* ip
[1*8]);
147 t2
= (int32_t)(xC7S1
* ip
[7*8]);
152 t1
= (int32_t)(xC7S1
* ip
[1*8]);
153 t2
= (int32_t)(xC1S7
* ip
[7*8]);
158 t1
= (int32_t)(xC3S5
* ip
[3*8]);
159 t2
= (int32_t)(xC5S3
* ip
[5*8]);
164 t1
= (int32_t)(xC3S5
* ip
[5*8]);
165 t2
= (int32_t)(xC5S3
* ip
[3*8]);
171 t1
= (int32_t)(xC4S4
* (A_
- C_
));
175 t1
= (int32_t)(xC4S4
* (B_
- D_
));
183 t1
= (int32_t)(xC4S4
* (ip
[0*8] + ip
[4*8]));
187 t1
= (int32_t)(xC4S4
* (ip
[0*8] - ip
[4*8]));
191 t1
= (int32_t)(xC2S6
* ip
[2*8]);
192 t2
= (int32_t)(xC6S2
* ip
[6*8]);
197 t1
= (int32_t)(xC6S2
* ip
[2*8]);
198 t2
= (int32_t)(xC2S6
* ip
[6*8]);
219 _Gd
+= IdctAdjustBeforeShift
;
220 _Add
+= IdctAdjustBeforeShift
;
221 _Ed
+= IdctAdjustBeforeShift
;
222 _Fd
+= IdctAdjustBeforeShift
;
224 /* Final sequence of operations over-write original inputs. */
226 ip
[0*8] = (_Gd
+ _Cd
) >> 4;
227 ip
[7*8] = (_Gd
- _Cd
) >> 4;
229 ip
[1*8] = (_Add
+ _Hd
) >> 4;
230 ip
[2*8] = (_Add
- _Hd
) >> 4;
232 ip
[3*8] = (_Ed
+ _Dd
) >> 4;
233 ip
[4*8] = (_Ed
- _Dd
) >> 4;
235 ip
[5*8] = (_Fd
+ _Bdd
) >> 4;
236 ip
[6*8] = (_Fd
- _Bdd
) >> 4;
238 dst
[0*stride
] = cm
[(_Gd
+ _Cd
) >> 4];
239 dst
[7*stride
] = cm
[(_Gd
- _Cd
) >> 4];
241 dst
[1*stride
] = cm
[(_Add
+ _Hd
) >> 4];
242 dst
[2*stride
] = cm
[(_Add
- _Hd
) >> 4];
244 dst
[3*stride
] = cm
[(_Ed
+ _Dd
) >> 4];
245 dst
[4*stride
] = cm
[(_Ed
- _Dd
) >> 4];
247 dst
[5*stride
] = cm
[(_Fd
+ _Bdd
) >> 4];
248 dst
[6*stride
] = cm
[(_Fd
- _Bdd
) >> 4];
250 dst
[0*stride
] = cm
[dst
[0*stride
] + ((_Gd
+ _Cd
) >> 4)];
251 dst
[7*stride
] = cm
[dst
[7*stride
] + ((_Gd
- _Cd
) >> 4)];
253 dst
[1*stride
] = cm
[dst
[1*stride
] + ((_Add
+ _Hd
) >> 4)];
254 dst
[2*stride
] = cm
[dst
[2*stride
] + ((_Add
- _Hd
) >> 4)];
256 dst
[3*stride
] = cm
[dst
[3*stride
] + ((_Ed
+ _Dd
) >> 4)];
257 dst
[4*stride
] = cm
[dst
[4*stride
] + ((_Ed
- _Dd
) >> 4)];
259 dst
[5*stride
] = cm
[dst
[5*stride
] + ((_Fd
+ _Bdd
) >> 4)];
260 dst
[6*stride
] = cm
[dst
[6*stride
] + ((_Fd
- _Bdd
) >> 4)];
272 ip
[7*8] = ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
281 dst
[7*stride
]= 128 + ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
284 int v
= ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
285 dst
[0*stride
] = cm
[dst
[0*stride
] + v
];
286 dst
[1*stride
] = cm
[dst
[1*stride
] + v
];
287 dst
[2*stride
] = cm
[dst
[2*stride
] + v
];
288 dst
[3*stride
] = cm
[dst
[3*stride
] + v
];
289 dst
[4*stride
] = cm
[dst
[4*stride
] + v
];
290 dst
[5*stride
] = cm
[dst
[5*stride
] + v
];
291 dst
[6*stride
] = cm
[dst
[6*stride
] + v
];
292 dst
[7*stride
] = cm
[dst
[7*stride
] + v
];
297 ip
++; /* next column */
302 void ff_vp3_idct_c(DCTELEM
*block
/* align 16*/){
303 idct(NULL
, 0, block
, 0);
306 void ff_vp3_idct_put_c(uint8_t *dest
/*align 8*/, int line_size
, DCTELEM
*block
/*align 16*/){
307 idct(dest
, line_size
, block
, 1);
310 void ff_vp3_idct_add_c(uint8_t *dest
/*align 8*/, int line_size
, DCTELEM
*block
/*align 16*/){
311 idct(dest
, line_size
, block
, 2);