2 * Copyright (C) 2004 the ffmpeg project
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * Standard C DSP-oriented functions cribbed from the original VP3
30 #define IdctAdjustBeforeShift 8
39 #define M(a,b) (((a) * (b))>>16)
41 static av_always_inline
void idct(uint8_t *dst
, int stride
, int16_t *input
, int type
)
44 uint8_t *cm
= ff_cropTbl
+ MAX_NEG_CROP
;
46 int A
, B
, C
, D
, Ad
, Bd
, Cd
, Dd
, E
, F
, G
, H
;
47 int Ed
, Gd
, Add
, Bdd
, Fd
, Hd
;
51 /* Inverse DCT on the rows now */
52 for (i
= 0; i
< 8; i
++) {
53 /* Check for non-zero values */
54 if ( ip
[0] | ip
[1] | ip
[2] | ip
[3] | ip
[4] | ip
[5] | ip
[6] | ip
[7] ) {
55 A
= M(xC1S7
, ip
[1]) + M(xC7S1
, ip
[7]);
56 B
= M(xC7S1
, ip
[1]) - M(xC1S7
, ip
[7]);
57 C
= M(xC3S5
, ip
[3]) + M(xC5S3
, ip
[5]);
58 D
= M(xC3S5
, ip
[5]) - M(xC5S3
, ip
[3]);
60 Ad
= M(xC4S4
, (A
- C
));
61 Bd
= M(xC4S4
, (B
- D
));
66 E
= M(xC4S4
, (ip
[0] + ip
[4]));
67 F
= M(xC4S4
, (ip
[0] - ip
[4]));
69 G
= M(xC2S6
, ip
[2]) + M(xC6S2
, ip
[6]);
70 H
= M(xC6S2
, ip
[2]) - M(xC2S6
, ip
[6]);
81 /* Final sequence of operations over-write original inputs. */
95 ip
+= 8; /* next row */
100 for ( i
= 0; i
< 8; i
++) {
101 /* Check for non-zero values (bitwise or faster than ||) */
102 if ( ip
[1 * 8] | ip
[2 * 8] | ip
[3 * 8] |
103 ip
[4 * 8] | ip
[5 * 8] | ip
[6 * 8] | ip
[7 * 8] ) {
105 A
= M(xC1S7
, ip
[1*8]) + M(xC7S1
, ip
[7*8]);
106 B
= M(xC7S1
, ip
[1*8]) - M(xC1S7
, ip
[7*8]);
107 C
= M(xC3S5
, ip
[3*8]) + M(xC5S3
, ip
[5*8]);
108 D
= M(xC3S5
, ip
[5*8]) - M(xC5S3
, ip
[3*8]);
110 Ad
= M(xC4S4
, (A
- C
));
111 Bd
= M(xC4S4
, (B
- D
));
116 E
= M(xC4S4
, (ip
[0*8] + ip
[4*8])) + 8;
117 F
= M(xC4S4
, (ip
[0*8] - ip
[4*8])) + 8;
124 G
= M(xC2S6
, ip
[2*8]) + M(xC6S2
, ip
[6*8]);
125 H
= M(xC6S2
, ip
[2*8]) - M(xC2S6
, ip
[6*8]);
136 /* Final sequence of operations over-write original inputs. */
138 ip
[0*8] = (Gd
+ Cd
) >> 4;
139 ip
[7*8] = (Gd
- Cd
) >> 4;
141 ip
[1*8] = (Add
+ Hd
) >> 4;
142 ip
[2*8] = (Add
- Hd
) >> 4;
144 ip
[3*8] = (Ed
+ Dd
) >> 4;
145 ip
[4*8] = (Ed
- Dd
) >> 4;
147 ip
[5*8] = (Fd
+ Bdd
) >> 4;
148 ip
[6*8] = (Fd
- Bdd
) >> 4;
150 dst
[0*stride
] = cm
[(Gd
+ Cd
) >> 4];
151 dst
[7*stride
] = cm
[(Gd
- Cd
) >> 4];
153 dst
[1*stride
] = cm
[(Add
+ Hd
) >> 4];
154 dst
[2*stride
] = cm
[(Add
- Hd
) >> 4];
156 dst
[3*stride
] = cm
[(Ed
+ Dd
) >> 4];
157 dst
[4*stride
] = cm
[(Ed
- Dd
) >> 4];
159 dst
[5*stride
] = cm
[(Fd
+ Bdd
) >> 4];
160 dst
[6*stride
] = cm
[(Fd
- Bdd
) >> 4];
162 dst
[0*stride
] = cm
[dst
[0*stride
] + ((Gd
+ Cd
) >> 4)];
163 dst
[7*stride
] = cm
[dst
[7*stride
] + ((Gd
- Cd
) >> 4)];
165 dst
[1*stride
] = cm
[dst
[1*stride
] + ((Add
+ Hd
) >> 4)];
166 dst
[2*stride
] = cm
[dst
[2*stride
] + ((Add
- Hd
) >> 4)];
168 dst
[3*stride
] = cm
[dst
[3*stride
] + ((Ed
+ Dd
) >> 4)];
169 dst
[4*stride
] = cm
[dst
[4*stride
] + ((Ed
- Dd
) >> 4)];
171 dst
[5*stride
] = cm
[dst
[5*stride
] + ((Fd
+ Bdd
) >> 4)];
172 dst
[6*stride
] = cm
[dst
[6*stride
] + ((Fd
- Bdd
) >> 4)];
184 ip
[7*8] = ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
193 dst
[7*stride
]= 128 + ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
196 int v
= ((xC4S4
* ip
[0*8] + (IdctAdjustBeforeShift
<<16))>>20);
197 dst
[0*stride
] = cm
[dst
[0*stride
] + v
];
198 dst
[1*stride
] = cm
[dst
[1*stride
] + v
];
199 dst
[2*stride
] = cm
[dst
[2*stride
] + v
];
200 dst
[3*stride
] = cm
[dst
[3*stride
] + v
];
201 dst
[4*stride
] = cm
[dst
[4*stride
] + v
];
202 dst
[5*stride
] = cm
[dst
[5*stride
] + v
];
203 dst
[6*stride
] = cm
[dst
[6*stride
] + v
];
204 dst
[7*stride
] = cm
[dst
[7*stride
] + v
];
209 ip
++; /* next column */
214 void ff_vp3_idct_c(DCTELEM
*block
/* align 16*/){
215 idct(NULL
, 0, block
, 0);
218 void ff_vp3_idct_put_c(uint8_t *dest
/*align 8*/, int line_size
, DCTELEM
*block
/*align 16*/){
219 idct(dest
, line_size
, block
, 1);
222 void ff_vp3_idct_add_c(uint8_t *dest
/*align 8*/, int line_size
, DCTELEM
*block
/*align 16*/){
223 idct(dest
, line_size
, block
, 2);