Add Russian translation provided by Валерий Крувялис <valkru@mail.ru>
[xiph-mirror.git] / theora-old / lib / idct.c
blob69d074e00846f09ce9173dae1d435ee95ef8d053
1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
10 * *
11 ********************************************************************
13 function: C implementation of the Theora iDCT
14 last mod: $Id$
16 ********************************************************************/
18 #include <string.h>
19 #include "codec_internal.h"
21 #include "quant_lookup.h"
23 #define IdctAdjustBeforeShift 8
24 /* cos(n*pi/16) or sin(8-n)*pi/16) */
25 #define xC1S7 64277
26 #define xC2S6 60547
27 #define xC3S5 54491
28 #define xC4S4 46341
29 #define xC5S3 36410
30 #define xC6S2 25080
31 #define xC7S1 12785
33 /* compute the 16 bit signed 1D inverse DCT - spec version */
35 static void idct_short__c ( ogg_int16_t * InputData, ogg_int16_t * OutputData ) {
36 ogg_int32_t t[8], r;
37 ogg_int16_t *y = InputData;
38 ogg_int16_t *x = OutputData;
40 t[0] = y[0] + y[4];
41 t[0] &= 0xffff;
42 t[0] = (xC4S4 * t[0]) >> 16;
44 t[1] = y[0] - y[4];
45 t[1] &= 0xffff;
46 t[1] = (xC4S4 * t[1]) >> 16;
48 t[2] = ((xC6S2 * t[2]) >> 16) - ((xC2S6 * y[6]) >> 16);
49 t[3] = ((xC2S6 * t[2]) >> 16) + ((xC6S2 * y[6]) >> 16);
50 t[4] = ((xC7S1 * t[1]) >> 16) - ((xC1S7 * y[7]) >> 16);
51 t[5] = ((xC3S5 * t[5]) >> 16) - ((xC5S3 * y[3]) >> 16);
52 t[6] = ((xC5S3 * t[5]) >> 16) + ((xC3S5 * y[3]) >> 16);
53 t[7] = ((xC1S7 * t[1]) >> 16) + ((xC7S1 * y[7]) >> 16);
55 r = t[4] + t[5];
56 t[5] = t[4] - t[5];
57 t[5] &= 0xffff;
58 t[5] = (xC4S4 * (-t[5])) >> 16;
59 t[4] = r;
61 r = t[7] + t[6];
62 t[6] = t[7] - t[6];
63 t[6] &= 0xffff;
64 t[6] = (xC4S4 * t[6]) >> 16;
65 t[7] = r;
67 r = t[0] + t[3];
68 t[3] = t[0] - t[3];
69 t[0] = r;
71 r = t[1] + t[2];
72 t[2] = t[1] - t[2];
73 t[1] = r;
75 r = t[6] + t[5];
76 t[5] = t[6] - t[5];
77 t[6] = r;
79 r = t[0] + t[7];
80 r &= 0xffff;
81 x[0] = r;
83 r = t[1] + t[6];
84 r &= 0xffff;
85 x[1] = r;
87 r = t[2] + t[5];
88 r &= 0xffff;
89 x[2] = r;
91 r = t[3] + t[4];
92 r &= 0xffff;
93 x[3] = r;
95 r = t[3] - t[4];
96 r &= 0xffff;
97 x[4] = r;
99 r = t[2] - t[5];
100 r &= 0xffff;
101 x[5] = r;
103 r = t[1] - t[6];
104 r &= 0xffff;
105 x[6] = r;
107 r = t[0] - t[7];
108 r &= 0xffff;
109 x[7] = r;
114 static void dequant_slow( ogg_int16_t * dequant_coeffs,
115 ogg_int16_t * quantized_list,
116 ogg_int32_t * DCT_block) {
117 int i;
118 for(i=0;i<64;i++)
119 DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
124 void IDctSlow__c( Q_LIST_ENTRY * InputData,
125 ogg_int16_t *QuantMatrix,
126 ogg_int16_t * OutputData ) {
127 ogg_int32_t IntermediateData[64];
128 ogg_int32_t * ip = IntermediateData;
129 ogg_int16_t * op = OutputData;
131 ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
132 ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
133 ogg_int32_t t1, t2;
135 int loop;
137 dequant_slow( QuantMatrix, InputData, IntermediateData);
139 /* Inverse DCT on the rows now */
140 for ( loop = 0; loop < 8; loop++){
141 /* Check for non-zero values */
142 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
143 t1 = (xC1S7 * ip[1]);
144 t2 = (xC7S1 * ip[7]);
145 t1 >>= 16;
146 t2 >>= 16;
147 _A = t1 + t2;
149 t1 = (xC7S1 * ip[1]);
150 t2 = (xC1S7 * ip[7]);
151 t1 >>= 16;
152 t2 >>= 16;
153 _B = t1 - t2;
155 t1 = (xC3S5 * ip[3]);
156 t2 = (xC5S3 * ip[5]);
157 t1 >>= 16;
158 t2 >>= 16;
159 _C = t1 + t2;
161 t1 = (xC3S5 * ip[5]);
162 t2 = (xC5S3 * ip[3]);
163 t1 >>= 16;
164 t2 >>= 16;
165 _D = t1 - t2;
167 t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
168 t1 >>= 16;
169 _Ad = t1;
171 t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
172 t1 >>= 16;
173 _Bd = t1;
176 _Cd = _A + _C;
177 _Dd = _B + _D;
179 t1 = (xC4S4 * (ogg_int16_t)(ip[0] + ip[4]));
180 t1 >>= 16;
181 _E = t1;
183 t1 = (xC4S4 * (ogg_int16_t)(ip[0] - ip[4]));
184 t1 >>= 16;
185 _F = t1;
187 t1 = (xC2S6 * ip[2]);
188 t2 = (xC6S2 * ip[6]);
189 t1 >>= 16;
190 t2 >>= 16;
191 _G = t1 + t2;
193 t1 = (xC6S2 * ip[2]);
194 t2 = (xC2S6 * ip[6]);
195 t1 >>= 16;
196 t2 >>= 16;
197 _H = t1 - t2;
200 _Ed = _E - _G;
201 _Gd = _E + _G;
203 _Add = _F + _Ad;
204 _Bdd = _Bd - _H;
206 _Fd = _F - _Ad;
207 _Hd = _Bd + _H;
209 /* Final sequence of operations over-write original inputs. */
210 ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0);
211 ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0);
213 ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0);
214 ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0);
216 ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0);
217 ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0);
219 ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0);
220 ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0);
224 ip += 8; /* next row */
227 ip = IntermediateData;
229 for ( loop = 0; loop < 8; loop++){
230 /* Check for non-zero values (bitwise or faster than ||) */
231 if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
232 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
234 t1 = (xC1S7 * ip[1*8]);
235 t2 = (xC7S1 * ip[7*8]);
236 t1 >>= 16;
237 t2 >>= 16;
238 _A = t1 + t2;
240 t1 = (xC7S1 * ip[1*8]);
241 t2 = (xC1S7 * ip[7*8]);
242 t1 >>= 16;
243 t2 >>= 16;
244 _B = t1 - t2;
246 t1 = (xC3S5 * ip[3*8]);
247 t2 = (xC5S3 * ip[5*8]);
248 t1 >>= 16;
249 t2 >>= 16;
250 _C = t1 + t2;
252 t1 = (xC3S5 * ip[5*8]);
253 t2 = (xC5S3 * ip[3*8]);
254 t1 >>= 16;
255 t2 >>= 16;
256 _D = t1 - t2;
258 t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
259 t1 >>= 16;
260 _Ad = t1;
262 t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
263 t1 >>= 16;
264 _Bd = t1;
267 _Cd = _A + _C;
268 _Dd = _B + _D;
270 t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] + ip[4*8]));
271 t1 >>= 16;
272 _E = t1;
274 t1 = (xC4S4 * (ogg_int16_t)(ip[0*8] - ip[4*8]));
275 t1 >>= 16;
276 _F = t1;
278 t1 = (xC2S6 * ip[2*8]);
279 t2 = (xC6S2 * ip[6*8]);
280 t1 >>= 16;
281 t2 >>= 16;
282 _G = t1 + t2;
284 t1 = (xC6S2 * ip[2*8]);
285 t2 = (xC2S6 * ip[6*8]);
286 t1 >>= 16;
287 t2 >>= 16;
288 _H = t1 - t2;
290 _Ed = _E - _G;
291 _Gd = _E + _G;
293 _Add = _F + _Ad;
294 _Bdd = _Bd - _H;
296 _Fd = _F - _Ad;
297 _Hd = _Bd + _H;
299 _Gd += IdctAdjustBeforeShift;
300 _Add += IdctAdjustBeforeShift;
301 _Ed += IdctAdjustBeforeShift;
302 _Fd += IdctAdjustBeforeShift;
304 /* Final sequence of operations over-write original inputs. */
305 op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4);
306 op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4);
308 op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4);
309 op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4);
311 op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4);
312 op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4);
314 op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4);
315 op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4);
316 }else{
317 op[0*8] = 0;
318 op[7*8] = 0;
319 op[1*8] = 0;
320 op[2*8] = 0;
321 op[3*8] = 0;
322 op[4*8] = 0;
323 op[5*8] = 0;
324 op[6*8] = 0;
327 ip++; /* next column */
328 op++;
332 /************************
333 x x x x 0 0 0 0
334 x x x 0 0 0 0 0
335 x x 0 0 0 0 0 0
336 x 0 0 0 0 0 0 0
337 0 0 0 0 0 0 0 0
338 0 0 0 0 0 0 0 0
339 0 0 0 0 0 0 0 0
340 0 0 0 0 0 0 0 0
341 *************************/
343 static void dequant_slow10( ogg_int16_t * dequant_coeffs,
344 ogg_int16_t * quantized_list,
345 ogg_int32_t * DCT_block){
346 int i;
347 memset(DCT_block,0, 128);
348 for(i=0;i<10;i++)
349 DCT_block[dezigzag_index[i]] = quantized_list[i] * dequant_coeffs[i];
353 void IDct10__c( Q_LIST_ENTRY * InputData,
354 ogg_int16_t *QuantMatrix,
355 ogg_int16_t * OutputData ){
356 ogg_int32_t IntermediateData[64];
357 ogg_int32_t * ip = IntermediateData;
358 ogg_int16_t * op = OutputData;
360 ogg_int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
361 ogg_int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
362 ogg_int32_t t1, t2;
364 int loop;
366 dequant_slow10( QuantMatrix, InputData, IntermediateData);
368 /* Inverse DCT on the rows now */
369 for ( loop = 0; loop < 4; loop++){
370 /* Check for non-zero values */
371 if ( ip[0] | ip[1] | ip[2] | ip[3] ){
372 t1 = (xC1S7 * ip[1]);
373 t1 >>= 16;
374 _A = t1;
376 t1 = (xC7S1 * ip[1]);
377 t1 >>= 16;
378 _B = t1 ;
380 t1 = (xC3S5 * ip[3]);
381 t1 >>= 16;
382 _C = t1;
384 t2 = (xC5S3 * ip[3]);
385 t2 >>= 16;
386 _D = -t2;
389 t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
390 t1 >>= 16;
391 _Ad = t1;
393 t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
394 t1 >>= 16;
395 _Bd = t1;
398 _Cd = _A + _C;
399 _Dd = _B + _D;
401 t1 = (xC4S4 * ip[0] );
402 t1 >>= 16;
403 _E = t1;
405 _F = t1;
407 t1 = (xC2S6 * ip[2]);
408 t1 >>= 16;
409 _G = t1;
411 t1 = (xC6S2 * ip[2]);
412 t1 >>= 16;
413 _H = t1 ;
416 _Ed = _E - _G;
417 _Gd = _E + _G;
419 _Add = _F + _Ad;
420 _Bdd = _Bd - _H;
422 _Fd = _F - _Ad;
423 _Hd = _Bd + _H;
425 /* Final sequence of operations over-write original inputs. */
426 ip[0] = (ogg_int16_t)((_Gd + _Cd ) >> 0);
427 ip[7] = (ogg_int16_t)((_Gd - _Cd ) >> 0);
429 ip[1] = (ogg_int16_t)((_Add + _Hd ) >> 0);
430 ip[2] = (ogg_int16_t)((_Add - _Hd ) >> 0);
432 ip[3] = (ogg_int16_t)((_Ed + _Dd ) >> 0);
433 ip[4] = (ogg_int16_t)((_Ed - _Dd ) >> 0);
435 ip[5] = (ogg_int16_t)((_Fd + _Bdd ) >> 0);
436 ip[6] = (ogg_int16_t)((_Fd - _Bdd ) >> 0);
440 ip += 8; /* next row */
443 ip = IntermediateData;
445 for ( loop = 0; loop < 8; loop++) {
446 /* Check for non-zero values (bitwise or faster than ||) */
447 if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] ) {
449 t1 = (xC1S7 * ip[1*8]);
450 t1 >>= 16;
451 _A = t1 ;
453 t1 = (xC7S1 * ip[1*8]);
454 t1 >>= 16;
455 _B = t1 ;
457 t1 = (xC3S5 * ip[3*8]);
458 t1 >>= 16;
459 _C = t1 ;
461 t2 = (xC5S3 * ip[3*8]);
462 t2 >>= 16;
463 _D = - t2;
466 t1 = (xC4S4 * (ogg_int16_t)(_A - _C));
467 t1 >>= 16;
468 _Ad = t1;
470 t1 = (xC4S4 * (ogg_int16_t)(_B - _D));
471 t1 >>= 16;
472 _Bd = t1;
475 _Cd = _A + _C;
476 _Dd = _B + _D;
478 t1 = (xC4S4 * ip[0*8]);
479 t1 >>= 16;
480 _E = t1;
481 _F = t1;
483 t1 = (xC2S6 * ip[2*8]);
484 t1 >>= 16;
485 _G = t1;
487 t1 = (xC6S2 * ip[2*8]);
488 t1 >>= 16;
489 _H = t1;
492 _Ed = _E - _G;
493 _Gd = _E + _G;
495 _Add = _F + _Ad;
496 _Bdd = _Bd - _H;
498 _Fd = _F - _Ad;
499 _Hd = _Bd + _H;
501 _Gd += IdctAdjustBeforeShift;
502 _Add += IdctAdjustBeforeShift;
503 _Ed += IdctAdjustBeforeShift;
504 _Fd += IdctAdjustBeforeShift;
506 /* Final sequence of operations over-write original inputs. */
507 op[0*8] = (ogg_int16_t)((_Gd + _Cd ) >> 4);
508 op[7*8] = (ogg_int16_t)((_Gd - _Cd ) >> 4);
510 op[1*8] = (ogg_int16_t)((_Add + _Hd ) >> 4);
511 op[2*8] = (ogg_int16_t)((_Add - _Hd ) >> 4);
513 op[3*8] = (ogg_int16_t)((_Ed + _Dd ) >> 4);
514 op[4*8] = (ogg_int16_t)((_Ed - _Dd ) >> 4);
516 op[5*8] = (ogg_int16_t)((_Fd + _Bdd ) >> 4);
517 op[6*8] = (ogg_int16_t)((_Fd - _Bdd ) >> 4);
518 }else{
519 op[0*8] = 0;
520 op[7*8] = 0;
521 op[1*8] = 0;
522 op[2*8] = 0;
523 op[3*8] = 0;
524 op[4*8] = 0;
525 op[5*8] = 0;
526 op[6*8] = 0;
529 ip++; /* next column */
530 op++;
534 /***************************
535 x 0 0 0 0 0 0 0
536 0 0 0 0 0 0 0 0
537 0 0 0 0 0 0 0 0
538 0 0 0 0 0 0 0 0
539 0 0 0 0 0 0 0 0
540 0 0 0 0 0 0 0 0
541 0 0 0 0 0 0 0 0
542 0 0 0 0 0 0 0 0
543 **************************/
545 void IDct1( Q_LIST_ENTRY * InputData,
546 ogg_int16_t *QuantMatrix,
547 ogg_int16_t * OutputData ){
548 int loop;
550 ogg_int16_t OutD;
552 OutD=(ogg_int16_t) ((ogg_int32_t)(InputData[0]*QuantMatrix[0]+15)>>5);
554 for(loop=0;loop<64;loop++)
555 OutputData[loop]=OutD;
559 void dsp_idct_init (DspFunctions *funcs, ogg_uint32_t cpu_flags)
561 funcs->IDctSlow = IDctSlow__c;
562 funcs->IDct10 = IDct10__c;
563 funcs->IDct3 = IDct10__c;
564 #if defined(USE_ASM)
565 if (cpu_flags & CPU_X86_MMX) {
566 dsp_mmx_idct_init(funcs);
568 #endif