2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding
3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 ** Any non-GPL usage of this software or parts of this software is strictly
22 ** Commercial non-GPL licensing of this software is possible.
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
46 /*Windowing functions borrowed from libwmai*/
50 void vector_fmul_add_add(real_t
*dst
, const real_t
*src0
, const real_t
*src1
, const real_t
*src2
, int len
)
52 /* Block sizes are always power of two */
55 "ldmia %[d]!, {r0, r1};"
56 "ldmia %[w]!, {r4, r5};"
57 /* consume the first data and window value so we can use those
59 "smull r8, r9, r0, r4;"
60 "ldmia %[src2]!, {r0, r4};"
61 "add r0, r0, r9, lsl #1;" /* *dst=*dst+(r9<<1)*/
62 "smull r8, r9, r1, r5;"
63 "add r1, r4, r9, lsl #1;"
64 "stmia %[dst]!, {r0, r1};"
65 "subs %[n], %[n], #2;"
67 : [d
] "+r" (src0
), [w
] "+r" (src1
), [src2
] "+r" (src2
), [dst
] "+r" (dst
), [n
] "+r" (len
)
69 : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
72 void vector_fmul_reverse(real_t
*dst
, const real_t
*src0
, const real_t
*src1
,
75 /* Block sizes are always power of two */
77 "add %[s1], %[s1], %[n], lsl #2;"
79 "ldmia %[s0]!, {r0, r1};"
80 "ldmdb %[s1]!, {r4, r5};"
81 "smull r8, r9, r0, r5;"
83 "smull r8, r9, r1, r4;"
85 "stmia %[dst]!, {r0, r1};"
86 "subs %[n], %[n], #2;"
88 : [s0
] "+r" (src0
), [s1
] "+r" (src1
), [dst
] "+r" (dst
), [n
] "+r" (len
)
90 : "r0", "r1", "r4", "r5", "r8", "r9", "memory", "cc");
93 #elif defined(CPU_COLDFIRE)
95 void vector_fmul_add_add(real_t
*dst
, const real_t
*src0
, const real_t
*src1
, const real_t
*src2
, int len
)
97 /* Block sizes are always power of two. Smallest block is always way bigger
101 "movem.l (%[src0]), %%d0-%%d3;"
102 "movem.l (%[src1]), %%d4-%%d5/%%a0-%%a1;"
103 "mac.l %%d0, %%d4, %%acc0;"
104 "mac.l %%d1, %%d5, %%acc1;"
105 "mac.l %%d2, %%a0, %%acc2;"
106 "mac.l %%d3, %%a1, %%acc3;"
107 "lea.l (16, %[src0]), %[src0];"
108 "lea.l (16, %[src1]), %[src1];"
109 "movclr.l %%acc0, %%d0;"
110 "movclr.l %%acc1, %%d1;"
111 "movclr.l %%acc2, %%d2;"
112 "movclr.l %%acc3, %%d3;"
113 "movem.l (%[src2]), %%d4-%%d5/%%a0-%%a1;"
114 "lea.l (16, %[src2]), %[src2];"
119 "movem.l %%d0-%%d3, (%[dst]);"
120 "lea.l (16, %[dst]), %[dst];"
123 : [src0
] "+a" (src0
), [src1
] "+a" (src1
), [src2
] "+a" (src2
), [dst
] "+a" (dst
), [n
] "+d" (len
)
125 : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
129 void vector_fmul_reverse(real_t
*dst
, const real_t
*src0
, const real_t
*src1
,
132 /* Block sizes are always power of two. Smallest block is always way bigger
135 "lea.l (-16, %[s1], %[n]*4), %[s1];"
137 "movem.l (%[s0]), %%d0-%%d3;"
138 "movem.l (%[s1]), %%d4-%%d5/%%a0-%%a1;"
139 "mac.l %%d0, %%a1, %%acc0;"
140 "mac.l %%d1, %%a0, %%acc1;"
141 "mac.l %%d2, %%d5, %%acc2;"
142 "mac.l %%d3, %%d4, %%acc3;"
143 "lea.l (16, %[s0]), %[s0];"
144 "lea.l (-16, %[s1]), %[s1];"
145 "movclr.l %%acc0, %%d0;"
146 "movclr.l %%acc1, %%d1;"
147 "movclr.l %%acc2, %%d2;"
148 "movclr.l %%acc3, %%d3;"
149 "movem.l %%d0-%%d3, (%[dst]);"
150 "lea.l (16, %[dst]), %[dst];"
153 : [s0
] "+a" (src0
), [s1
] "+a" (src1
), [dst
] "+a" (dst
), [n
] "+d" (len
)
154 : : "d0", "d1", "d2", "d3", "d4", "d5", "a0", "a1", "memory", "cc");
158 static inline void vector_fmul_add_add(real_t
*dst
, const real_t
*src0
, const real_t
*src1
, const real_t
*src2
, int len
){
161 dst
[i
] = MUL_F(src0
[i
], src1
[i
]) + src2
[i
];
164 static inline void vector_fmul_reverse(real_t
*dst
, const real_t
*src0
, const real_t
*src1
, int len
){
168 dst
[i
] = MUL_F(src0
[i
], src1
[-i
]);
173 static INLINE
void mdct(fb_info
*fb
, real_t
*in_data
, real_t
*out_data
, uint16_t len
)
175 mdct_info
*mdct
= NULL
;
195 faad_mdct(mdct
, in_data
, out_data
);
199 ALIGN real_t transf_buf
[2*1024] IBSS_ATTR
;
201 void ifilter_bank(uint8_t window_sequence
, uint8_t window_shape
,
202 uint8_t window_shape_prev
, real_t
*freq_in
,
203 real_t
*time_out
, real_t
*overlap
,
204 uint8_t object_type
, uint16_t frame_len
)
208 const real_t
*window_long
= NULL
;
209 const real_t
*window_long_prev
= NULL
;
210 const real_t
*window_short
= NULL
;
211 const real_t
*window_short_prev
= NULL
;
213 uint16_t nlong
= frame_len
;
214 uint16_t nshort
= frame_len
/8;
215 uint16_t trans
= nshort
/2;
217 uint16_t nflat_ls
= (nlong
-nshort
)/2;
220 int64_t count
= faad_get_ts();
223 memset(transf_buf
,0,sizeof(transf_buf
));
224 /* select windows of current frame and previous frame (Sine or KBD) */
226 if (object_type
== LD
)
228 window_long
= fb
->ld_window
[window_shape
];
229 window_long_prev
= fb
->ld_window
[window_shape_prev
];
235 /*AAC uses two different window shapes depending on spectal features*/
236 if(window_shape
== 0){
237 window_long
= sine_long_1024
;
238 window_short
= sine_short_128
;
240 window_long
= kbd_long_1024
;
241 window_short
= kbd_short_128
;
244 if(window_shape_prev
== 0){
245 window_long_prev
= sine_long_1024
;
246 window_short_prev
= sine_short_128
;
248 window_long_prev
= kbd_long_1024
;
249 window_short_prev
= kbd_short_128
;
257 for (i
= 0; i
< 1024; i
++)
259 printf("%d\n", freq_in
[i
]);
264 printf("%d %d\n", window_sequence
, window_shape
);
266 switch (window_sequence
)
268 case ONLY_LONG_SEQUENCE
:
270 mdct_backward(2048, freq_in
, transf_buf
);
272 /* add second half output of previous frame to windowed output of current frame */
273 vector_fmul_add_add(time_out
, transf_buf
, window_long_prev
, overlap
, nlong
);
275 /* window the second half and save as overlap for next frame */
276 vector_fmul_reverse(overlap
, transf_buf
+nlong
, window_long
, nlong
);
280 case LONG_START_SEQUENCE
:
282 mdct_backward(2048, freq_in
, transf_buf
);
284 /* add second half output of previous frame to windowed output of current frame */
285 vector_fmul_add_add(time_out
, transf_buf
, window_long_prev
, overlap
, nlong
);
287 /* window the second half and save as overlap for next frame */
288 /* construct second half window using padding with 1's and 0's */
290 memcpy(overlap
, transf_buf
+nlong
, nflat_ls
*sizeof(real_t
));
292 vector_fmul_reverse(overlap
+nflat_ls
, transf_buf
+nlong
+nflat_ls
, window_short
, nshort
);
294 memset(overlap
+nflat_ls
+nshort
, 0, nflat_ls
*sizeof(real_t
));
297 case EIGHT_SHORT_SEQUENCE
:
298 /*this could be assemblerized too, but this case is extremely uncommon*/
300 /* perform iMDCT for each short block */
301 mdct_backward(256, freq_in
+0*nshort
, transf_buf
+2*nshort
*0);
302 mdct_backward(256, freq_in
+1*nshort
, transf_buf
+2*nshort
*1);
303 mdct_backward(256, freq_in
+2*nshort
, transf_buf
+2*nshort
*2);
304 mdct_backward(256, freq_in
+3*nshort
, transf_buf
+2*nshort
*3);
305 mdct_backward(256, freq_in
+4*nshort
, transf_buf
+2*nshort
*4);
306 mdct_backward(256, freq_in
+5*nshort
, transf_buf
+2*nshort
*5);
307 mdct_backward(256, freq_in
+6*nshort
, transf_buf
+2*nshort
*6);
308 mdct_backward(256, freq_in
+7*nshort
, transf_buf
+2*nshort
*7);
310 /* add second half output of previous frame to windowed output of current frame */
311 for (i
= 0; i
< nflat_ls
; i
++)
312 time_out
[i
] = overlap
[i
];
313 for(i
= 0; i
< nshort
; i
++)
315 time_out
[nflat_ls
+ i
] = overlap
[nflat_ls
+ i
] + MUL_F(transf_buf
[nshort
*0+i
],window_short_prev
[i
]);
316 time_out
[nflat_ls
+1*nshort
+i
] = overlap
[nflat_ls
+nshort
*1+i
] + MUL_F(transf_buf
[nshort
*1+i
],window_short
[nshort
-1-i
]) + MUL_F(transf_buf
[nshort
*2+i
],window_short
[i
]);
317 time_out
[nflat_ls
+2*nshort
+i
] = overlap
[nflat_ls
+nshort
*2+i
] + MUL_F(transf_buf
[nshort
*3+i
],window_short
[nshort
-1-i
]) + MUL_F(transf_buf
[nshort
*4+i
],window_short
[i
]);
318 time_out
[nflat_ls
+3*nshort
+i
] = overlap
[nflat_ls
+nshort
*3+i
] + MUL_F(transf_buf
[nshort
*5+i
],window_short
[nshort
-1-i
]) + MUL_F(transf_buf
[nshort
*6+i
],window_short
[i
]);
320 time_out
[nflat_ls
+4*nshort
+i
] = overlap
[nflat_ls
+nshort
*4+i
] + MUL_F(transf_buf
[nshort
*7+i
],window_short
[nshort
-1-i
]) + MUL_F(transf_buf
[nshort
*8+i
],window_short
[i
]);
323 /* window the second half and save as overlap for next frame */
324 for(i
= 0; i
< nshort
; i
++)
327 overlap
[nflat_ls
+4*nshort
+i
-nlong
] = MUL_F(transf_buf
[nshort
*7+i
],window_short
[nshort
-1-i
]) + MUL_F(transf_buf
[nshort
*8+i
],window_short
[i
]);
328 overlap
[nflat_ls
+5*nshort
+i
-nlong
] = MUL_F(transf_buf
[nshort
*9+i
],window_short
[nshort
-1-i
]) + MUL_F(transf_buf
[nshort
*10+i
],window_short
[i
]);
329 overlap
[nflat_ls
+6*nshort
+i
-nlong
] = MUL_F(transf_buf
[nshort
*11+i
],window_short
[nshort
-1-i
]) + MUL_F(transf_buf
[nshort
*12+i
],window_short
[i
]);
330 overlap
[nflat_ls
+7*nshort
+i
-nlong
] = MUL_F(transf_buf
[nshort
*13+i
],window_short
[nshort
-1-i
]) + MUL_F(transf_buf
[nshort
*14+i
],window_short
[i
]);
331 overlap
[nflat_ls
+8*nshort
+i
-nlong
] = MUL_F(transf_buf
[nshort
*15+i
],window_short
[nshort
-1-i
]);
333 memset(overlap
+nflat_ls
+nshort
, 0, nflat_ls
*sizeof(real_t
));
337 case LONG_STOP_SEQUENCE
:
339 mdct_backward(2048, freq_in
, transf_buf
);
341 /* add second half output of previous frame to windowed output of current frame */
342 /* construct first half window using padding with 1's and 0's */
343 memcpy(time_out
, overlap
, nflat_ls
*sizeof(real_t
));
345 vector_fmul_add_add(time_out
+nflat_ls
, transf_buf
+nflat_ls
, window_short_prev
, overlap
+nflat_ls
, nshort
);
347 for (i
= 0; i
< nflat_ls
; i
++)
348 time_out
[nflat_ls
+nshort
+i
] = overlap
[nflat_ls
+nshort
+i
] + transf_buf
[nflat_ls
+nshort
+i
];
350 /* window the second half and save as overlap for next frame */
351 vector_fmul_reverse(overlap
, transf_buf
+nlong
, window_long
, nlong
);
356 for (i
= 0; i
< 1024; i
++)
358 printf("%d\n", time_out
[i
]);
359 //printf("0x%.8X\n", time_out[i]);
365 count
= faad_get_ts() - count
;
372 ALIGN real_t windowed_buf
[2*1024] = {0};
373 /* only works for LTP -> no overlapping, no short blocks */
374 void filter_bank_ltp(fb_info
*fb
, uint8_t window_sequence
, uint8_t window_shape
,
375 uint8_t window_shape_prev
, real_t
*in_data
, real_t
*out_mdct
,
376 uint8_t object_type
, uint16_t frame_len
)
380 const real_t
*window_long
= NULL
;
381 const real_t
*window_long_prev
= NULL
;
382 const real_t
*window_short
= NULL
;
383 const real_t
*window_short_prev
= NULL
;
385 uint16_t nlong
= frame_len
;
386 uint16_t nshort
= frame_len
/8;
387 uint16_t nflat_ls
= (nlong
-nshort
)/2;
389 //assert(window_sequence != EIGHT_SHORT_SEQUENCE);
391 memset(windowed_buf
,0,sizeof(windowed_buf
));
393 if (object_type
== LD
)
395 window_long
= fb
->ld_window
[window_shape
];
396 window_long_prev
= fb
->ld_window
[window_shape_prev
];
401 window_long
= fb
->long_window
[window_shape
];
402 window_long_prev
= fb
->long_window
[window_shape_prev
];
403 window_short
= fb
->short_window
[window_shape
];
404 window_short_prev
= fb
->short_window
[window_shape_prev
];
409 switch(window_sequence
)
411 case ONLY_LONG_SEQUENCE
:
412 for (i
= nlong
-1; i
>= 0; i
--)
414 windowed_buf
[i
] = MUL_F(in_data
[i
], window_long_prev
[i
]);
415 windowed_buf
[i
+nlong
] = MUL_F(in_data
[i
+nlong
], window_long
[nlong
-1-i
]);
417 mdct(fb
, windowed_buf
, out_mdct
, 2*nlong
);
420 case LONG_START_SEQUENCE
:
421 for (i
= 0; i
< nlong
; i
++)
422 windowed_buf
[i
] = MUL_F(in_data
[i
], window_long_prev
[i
]);
423 for (i
= 0; i
< nflat_ls
; i
++)
424 windowed_buf
[i
+nlong
] = in_data
[i
+nlong
];
425 for (i
= 0; i
< nshort
; i
++)
426 windowed_buf
[i
+nlong
+nflat_ls
] = MUL_F(in_data
[i
+nlong
+nflat_ls
], window_short
[nshort
-1-i
]);
427 for (i
= 0; i
< nflat_ls
; i
++)
428 windowed_buf
[i
+nlong
+nflat_ls
+nshort
] = 0;
429 mdct(fb
, windowed_buf
, out_mdct
, 2*nlong
);
432 case LONG_STOP_SEQUENCE
:
433 for (i
= 0; i
< nflat_ls
; i
++)
435 for (i
= 0; i
< nshort
; i
++)
436 windowed_buf
[i
+nflat_ls
] = MUL_F(in_data
[i
+nflat_ls
], window_short_prev
[i
]);
437 for (i
= 0; i
< nflat_ls
; i
++)
438 windowed_buf
[i
+nflat_ls
+nshort
] = in_data
[i
+nflat_ls
+nshort
];
439 for (i
= 0; i
< nlong
; i
++)
440 windowed_buf
[i
+nlong
] = MUL_F(in_data
[i
+nlong
], window_long
[nlong
-1-i
]);
441 mdct(fb
, windowed_buf
, out_mdct
, 2*nlong
);