2 * Floating point AAN DCT
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * The AAN DCT in this file except ff_faandct248() can also be used under the
24 * new (3 clause) BSD license.
30 * Floating point AAN DCT
31 * @author Michael Niedermayer <michaelni@gmx.at>
39 # define SCALE(x) postscale[x]
44 //numbers generated by simple c code (not as accurate as they could be)
47 printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(i*acosl(-1.0)/(long double)16.0)*sqrtl(2)));
50 #define B0 1.00000000000000000000
51 #define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1
52 #define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1
53 #define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1
54 #define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
55 #define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1
56 #define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1
57 #define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1
60 #define A1 0.70710678118654752438 // cos(pi*4/16)
61 #define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
62 #define A5 0.38268343236508977170 // cos(pi*6/16)
63 #define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
65 static FLOAT postscale
[64]={
66 B0
*B0
, B0
*B1
, B0
*B2
, B0
*B3
, B0
*B4
, B0
*B5
, B0
*B6
, B0
*B7
,
67 B1
*B0
, B1
*B1
, B1
*B2
, B1
*B3
, B1
*B4
, B1
*B5
, B1
*B6
, B1
*B7
,
68 B2
*B0
, B2
*B1
, B2
*B2
, B2
*B3
, B2
*B4
, B2
*B5
, B2
*B6
, B2
*B7
,
69 B3
*B0
, B3
*B1
, B3
*B2
, B3
*B3
, B3
*B4
, B3
*B5
, B3
*B6
, B3
*B7
,
70 B4
*B0
, B4
*B1
, B4
*B2
, B4
*B3
, B4
*B4
, B4
*B5
, B4
*B6
, B4
*B7
,
71 B5
*B0
, B5
*B1
, B5
*B2
, B5
*B3
, B5
*B4
, B5
*B5
, B5
*B6
, B5
*B7
,
72 B6
*B0
, B6
*B1
, B6
*B2
, B6
*B3
, B6
*B4
, B6
*B5
, B6
*B6
, B6
*B7
,
73 B7
*B0
, B7
*B1
, B7
*B2
, B7
*B3
, B7
*B4
, B7
*B5
, B7
*B6
, B7
*B7
,
76 static av_always_inline
void row_fdct(FLOAT temp
[64], DCTELEM
* data
)
78 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
79 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
80 FLOAT z2
, z4
, z11
, z13
;
84 for (i
=0; i
<8*8; i
+=8) {
85 tmp0
= data
[0 + i
] + data
[7 + i
];
86 tmp7
= data
[0 + i
] - data
[7 + i
];
87 tmp1
= data
[1 + i
] + data
[6 + i
];
88 tmp6
= data
[1 + i
] - data
[6 + i
];
89 tmp2
= data
[2 + i
] + data
[5 + i
];
90 tmp5
= data
[2 + i
] - data
[5 + i
];
91 tmp3
= data
[3 + i
] + data
[4 + i
];
92 tmp4
= data
[3 + i
] - data
[4 + i
];
99 temp
[0 + i
]= tmp10
+ tmp11
;
100 temp
[4 + i
]= tmp10
- tmp11
;
104 temp
[2 + i
]= tmp13
+ tmp12
;
105 temp
[6 + i
]= tmp13
- tmp12
;
112 z5
= (tmp4
- tmp6
) * A5
;
116 z2
= tmp4
*(A2
+A5
) - tmp6
*A5
;
117 z4
= tmp6
*(A4
-A5
) + tmp4
*A5
;
124 temp
[5 + i
]= z13
+ z2
;
125 temp
[3 + i
]= z13
- z2
;
126 temp
[1 + i
]= z11
+ z4
;
127 temp
[7 + i
]= z11
- z4
;
131 void ff_faandct(DCTELEM
* data
)
133 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
134 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
135 FLOAT z2
, z4
, z11
, z13
;
142 row_fdct(temp
, data
);
144 for (i
=0; i
<8; i
++) {
145 tmp0
= temp
[8*0 + i
] + temp
[8*7 + i
];
146 tmp7
= temp
[8*0 + i
] - temp
[8*7 + i
];
147 tmp1
= temp
[8*1 + i
] + temp
[8*6 + i
];
148 tmp6
= temp
[8*1 + i
] - temp
[8*6 + i
];
149 tmp2
= temp
[8*2 + i
] + temp
[8*5 + i
];
150 tmp5
= temp
[8*2 + i
] - temp
[8*5 + i
];
151 tmp3
= temp
[8*3 + i
] + temp
[8*4 + i
];
152 tmp4
= temp
[8*3 + i
] - temp
[8*4 + i
];
159 data
[8*0 + i
]= lrintf(SCALE(8*0 + i
) * (tmp10
+ tmp11
));
160 data
[8*4 + i
]= lrintf(SCALE(8*4 + i
) * (tmp10
- tmp11
));
164 data
[8*2 + i
]= lrintf(SCALE(8*2 + i
) * (tmp13
+ tmp12
));
165 data
[8*6 + i
]= lrintf(SCALE(8*6 + i
) * (tmp13
- tmp12
));
172 z5
= (tmp4
- tmp6
) * A5
;
176 z2
= tmp4
*(A2
+A5
) - tmp6
*A5
;
177 z4
= tmp6
*(A4
-A5
) + tmp4
*A5
;
184 data
[8*5 + i
]= lrintf(SCALE(8*5 + i
) * (z13
+ z2
));
185 data
[8*3 + i
]= lrintf(SCALE(8*3 + i
) * (z13
- z2
));
186 data
[8*1 + i
]= lrintf(SCALE(8*1 + i
) * (z11
+ z4
));
187 data
[8*7 + i
]= lrintf(SCALE(8*7 + i
) * (z11
- z4
));
191 void ff_faandct248(DCTELEM
* data
)
193 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
194 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
200 row_fdct(temp
, data
);
202 for (i
=0; i
<8; i
++) {
203 tmp0
= temp
[8*0 + i
] + temp
[8*1 + i
];
204 tmp1
= temp
[8*2 + i
] + temp
[8*3 + i
];
205 tmp2
= temp
[8*4 + i
] + temp
[8*5 + i
];
206 tmp3
= temp
[8*6 + i
] + temp
[8*7 + i
];
207 tmp4
= temp
[8*0 + i
] - temp
[8*1 + i
];
208 tmp5
= temp
[8*2 + i
] - temp
[8*3 + i
];
209 tmp6
= temp
[8*4 + i
] - temp
[8*5 + i
];
210 tmp7
= temp
[8*6 + i
] - temp
[8*7 + i
];
217 data
[8*0 + i
] = lrintf(SCALE(8*0 + i
) * (tmp10
+ tmp11
));
218 data
[8*4 + i
] = lrintf(SCALE(8*4 + i
) * (tmp10
- tmp11
));
222 data
[8*2 + i
] = lrintf(SCALE(8*2 + i
) * (tmp13
+ tmp12
));
223 data
[8*6 + i
] = lrintf(SCALE(8*6 + i
) * (tmp13
- tmp12
));
230 data
[8*1 + i
] = lrintf(SCALE(8*0 + i
) * (tmp10
+ tmp11
));
231 data
[8*5 + i
] = lrintf(SCALE(8*4 + i
) * (tmp10
- tmp11
));
235 data
[8*3 + i
] = lrintf(SCALE(8*2 + i
) * (tmp13
+ tmp12
));
236 data
[8*7 + i
] = lrintf(SCALE(8*6 + i
) * (tmp13
- tmp12
));