2 * Floating point AAN DCT
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
25 * Floating point AAN DCT
26 * @author Michael Niedermayer <michaelni@gmx.at>
34 # define SCALE(x) postscale[x]
39 //numbers generated by simple c code (not as accurate as they could be)
42 printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(i*acosl(-1.0)/(long double)16.0)*sqrtl(2)));
45 #define B0 1.00000000000000000000
46 #define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1
47 #define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1
48 #define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1
49 #define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
50 #define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1
51 #define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1
52 #define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1
55 #define A1 0.70710678118654752438 // cos(pi*4/16)
56 #define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
57 #define A5 0.38268343236508977170 // cos(pi*6/16)
58 #define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
60 static FLOAT postscale
[64]={
61 B0
*B0
, B0
*B1
, B0
*B2
, B0
*B3
, B0
*B4
, B0
*B5
, B0
*B6
, B0
*B7
,
62 B1
*B0
, B1
*B1
, B1
*B2
, B1
*B3
, B1
*B4
, B1
*B5
, B1
*B6
, B1
*B7
,
63 B2
*B0
, B2
*B1
, B2
*B2
, B2
*B3
, B2
*B4
, B2
*B5
, B2
*B6
, B2
*B7
,
64 B3
*B0
, B3
*B1
, B3
*B2
, B3
*B3
, B3
*B4
, B3
*B5
, B3
*B6
, B3
*B7
,
65 B4
*B0
, B4
*B1
, B4
*B2
, B4
*B3
, B4
*B4
, B4
*B5
, B4
*B6
, B4
*B7
,
66 B5
*B0
, B5
*B1
, B5
*B2
, B5
*B3
, B5
*B4
, B5
*B5
, B5
*B6
, B5
*B7
,
67 B6
*B0
, B6
*B1
, B6
*B2
, B6
*B3
, B6
*B4
, B6
*B5
, B6
*B6
, B6
*B7
,
68 B7
*B0
, B7
*B1
, B7
*B2
, B7
*B3
, B7
*B4
, B7
*B5
, B7
*B6
, B7
*B7
,
71 static always_inline
void row_fdct(FLOAT temp
[64], DCTELEM
* data
)
73 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
74 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
75 FLOAT z1
, z2
, z3
, z4
, z5
, z11
, z13
;
78 for (i
=0; i
<8*8; i
+=8) {
79 tmp0
= data
[0 + i
] + data
[7 + i
];
80 tmp7
= data
[0 + i
] - data
[7 + i
];
81 tmp1
= data
[1 + i
] + data
[6 + i
];
82 tmp6
= data
[1 + i
] - data
[6 + i
];
83 tmp2
= data
[2 + i
] + data
[5 + i
];
84 tmp5
= data
[2 + i
] - data
[5 + i
];
85 tmp3
= data
[3 + i
] + data
[4 + i
];
86 tmp4
= data
[3 + i
] - data
[4 + i
];
93 temp
[0 + i
]= tmp10
+ tmp11
;
94 temp
[4 + i
]= tmp10
- tmp11
;
96 z1
= (tmp12
+ tmp13
)*A1
;
97 temp
[2 + i
]= tmp13
+ z1
;
98 temp
[6 + i
]= tmp13
- z1
;
104 z5
= (tmp10
- tmp12
) * A5
;
112 temp
[5 + i
]= z13
+ z2
;
113 temp
[3 + i
]= z13
- z2
;
114 temp
[1 + i
]= z11
+ z4
;
115 temp
[7 + i
]= z11
- z4
;
119 void ff_faandct(DCTELEM
* data
)
121 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
122 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
123 FLOAT z1
, z2
, z3
, z4
, z5
, z11
, z13
;
129 row_fdct(temp
, data
);
131 for (i
=0; i
<8; i
++) {
132 tmp0
= temp
[8*0 + i
] + temp
[8*7 + i
];
133 tmp7
= temp
[8*0 + i
] - temp
[8*7 + i
];
134 tmp1
= temp
[8*1 + i
] + temp
[8*6 + i
];
135 tmp6
= temp
[8*1 + i
] - temp
[8*6 + i
];
136 tmp2
= temp
[8*2 + i
] + temp
[8*5 + i
];
137 tmp5
= temp
[8*2 + i
] - temp
[8*5 + i
];
138 tmp3
= temp
[8*3 + i
] + temp
[8*4 + i
];
139 tmp4
= temp
[8*3 + i
] - temp
[8*4 + i
];
146 data
[8*0 + i
]= lrintf(SCALE(8*0 + i
) * (tmp10
+ tmp11
));
147 data
[8*4 + i
]= lrintf(SCALE(8*4 + i
) * (tmp10
- tmp11
));
149 z1
= (tmp12
+ tmp13
)* A1
;
150 data
[8*2 + i
]= lrintf(SCALE(8*2 + i
) * (tmp13
+ z1
));
151 data
[8*6 + i
]= lrintf(SCALE(8*6 + i
) * (tmp13
- z1
));
157 z5
= (tmp10
- tmp12
) * A5
;
165 data
[8*5 + i
]= lrintf(SCALE(8*5 + i
) * (z13
+ z2
));
166 data
[8*3 + i
]= lrintf(SCALE(8*3 + i
) * (z13
- z2
));
167 data
[8*1 + i
]= lrintf(SCALE(8*1 + i
) * (z11
+ z4
));
168 data
[8*7 + i
]= lrintf(SCALE(8*7 + i
) * (z11
- z4
));
172 void ff_faandct248(DCTELEM
* data
)
174 FLOAT tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
175 FLOAT tmp10
, tmp11
, tmp12
, tmp13
;
182 row_fdct(temp
, data
);
184 for (i
=0; i
<8; i
++) {
185 tmp0
= temp
[8*0 + i
] + temp
[8*1 + i
];
186 tmp1
= temp
[8*2 + i
] + temp
[8*3 + i
];
187 tmp2
= temp
[8*4 + i
] + temp
[8*5 + i
];
188 tmp3
= temp
[8*6 + i
] + temp
[8*7 + i
];
189 tmp4
= temp
[8*0 + i
] - temp
[8*1 + i
];
190 tmp5
= temp
[8*2 + i
] - temp
[8*3 + i
];
191 tmp6
= temp
[8*4 + i
] - temp
[8*5 + i
];
192 tmp7
= temp
[8*6 + i
] - temp
[8*7 + i
];
199 data
[8*0 + i
] = lrintf(SCALE(8*0 + i
) * (tmp10
+ tmp11
));
200 data
[8*4 + i
] = lrintf(SCALE(8*4 + i
) * (tmp10
- tmp11
));
202 z1
= (tmp12
+ tmp13
)* A1
;
203 data
[8*2 + i
] = lrintf(SCALE(8*2 + i
) * (tmp13
+ z1
));
204 data
[8*6 + i
] = lrintf(SCALE(8*6 + i
) * (tmp13
- z1
));
211 data
[8*1 + i
] = lrintf(SCALE(8*0 + i
) * (tmp10
+ tmp11
));
212 data
[8*5 + i
] = lrintf(SCALE(8*4 + i
) * (tmp10
- tmp11
));
214 z1
= (tmp12
+ tmp13
)* A1
;
215 data
[8*3 + i
] = lrintf(SCALE(8*2 + i
) * (tmp13
+ z1
));
216 data
[8*7 + i
] = lrintf(SCALE(8*6 + i
) * (tmp13
- z1
));