1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2006 by David Bryant
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
22 /* This is an assembly optimized version of the following WavPack function:
24 * void decorr_stereo_pass_cont_arml (struct decorr_pass *dpp,
25 * long *buffer, long sample_count);
27 * It performs a single pass of stereo decorrelation on the provided buffer.
28 * Note that this version of the function requires that the 8 previous stereo
29 * samples are visible and correct. In other words, it ignores the "samples_*"
30 * fields in the decorr_pass structure and gets the history data directly
31 * from the buffer. It does, however, return the appropriate history samples
32 * to the decorr_pass structure before returning.
34 * This is written to work on a ARM7TDMI processor. This version uses the
35 * 64-bit multiply-accumulate instruction and so can be used with all
36 * WavPack files. However, for optimum performance with 16-bit WavPack
37 * files, there is a faster version that only uses the 32-bit MLA
43 .global decorr_stereo_pass_cont_arml
48 * r0 = struct decorr_pass *dpp
50 * r2 = long sample_count
53 decorr_stereo_pass_cont_arml:
55 stmfd sp!, {r4 - r8, r10, r11, lr}
57 mov r11, #512 @ r11 = 512 for rounding
58 ldrsh r6, [r0, #2] @ r6 = dpp->delta
59 ldrsh r4, [r0, #4] @ r4 = dpp->weight_A
60 ldrsh r0, [r0, #6] @ r0 = dpp->weight_B
61 cmp r2, #0 @ exit if no samples to process
64 mov r0, r0, asl #18 @ for 64-bit math we use weights << 18
67 add r7, r1, r2, asl #3 @ r7 = buffer ending position
68 ldrsh r2, [r5, #0] @ r2 = dpp->term
72 ldr lr, [r1, #-16] @ load 2 sample history from buffer
73 ldr r10, [r1, #-12] @ for terms 2, 17, and 18
88 mov r10, #(1024 << 18) @ r10 = -1024 << 18 for weight clipping
89 rsb r10, r10, #0 @ (only used for negative terms)
99 ******************************************************************************
100 * Loop to handle term = 17 condition
102 * r0 = dpp->weight_B r8 = previous left sample
104 * r2 = current sample r10 = second previous left sample << 4
105 * r3 = previous right sample r11 = lo accumulator (for rounding)
106 * r4 = dpp->weight_A ip = current decorrelation value
108 * r6 = dpp->delta lr = second previous right sample << 4
110 *******************************************************************************
114 rsbs ip, lr, r8, asl #5 @ decorr value = (2 * prev) - 2nd prev
115 mov lr, r8, asl #4 @ previous becomes 2nd previous
116 ldr r2, [r1], #4 @ get sample & update pointer
119 smlalne r11, r8, r4, ip
120 strne r8, [r1, #-4] @ if change possible, store sample back
123 teq ip, r2 @ update weight based on signs
127 .L325: rsbs ip, r10, r3, asl #5 @ do same thing for right channel
132 smlalne r11, r3, r0, ip
140 .L329: cmp r7, r1 @ loop back if more samples to do
144 b store_1718 @ common exit for terms 17 & 18
147 ******************************************************************************
148 * Loop to handle term = 18 condition
150 * r0 = dpp->weight_B r8 = previous left sample
152 * r2 = current sample r10 = second previous left sample
153 * r3 = previous right sample r11 = lo accumulator (for rounding)
154 * r4 = dpp->weight_A ip = decorrelation value
156 * r6 = dpp->delta lr = second previous right sample
158 *******************************************************************************
162 rsb ip, lr, r8 @ decorr value =
163 mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1
164 add ip, lr, ip, asr #1
166 ldr r2, [r1], #4 @ get sample & update pointer
169 smlalne r11, r8, r4, ip
170 strne r8, [r1, #-4] @ if change possible, store sample back
173 teq ip, r2 @ update weight based on signs
177 .L337: rsb ip, r10, r3 @ do same thing for right channel
179 add ip, r10, ip, asr #1
184 smlalne r11, r3, r0, ip
192 .L341: cmp r7, r1 @ loop back if more samples to do
195 /* common exit for terms 17 & 18 */
198 str r3, [r5, #40] @ store sample history into struct
202 b common_exit @ and return
205 ******************************************************************************
206 * Loop to handle term = 2 condition
207 * (note that this case can be handled by the default term handler (1-8), but
208 * this special case is faster because it doesn't have to read memory twice)
210 * r0 = dpp->weight_B r8 = previous left sample
212 * r2 = current sample r10 = second previous left sample << 4
213 * r3 = previous right sample r11 = lo accumulator (for rounding)
214 * r4 = dpp->weight_A ip = decorrelation value
216 * r6 = dpp->delta lr = second previous right sample << 4
218 *******************************************************************************
222 movs ip, lr @ get decorrelation value & test
223 ldr r2, [r1], #4 @ get sample & update pointer
224 mov lr, r8, asl #4 @ previous becomes 2nd previous
227 smlalne r11, r8, r4, ip
228 strne r8, [r1, #-4] @ if change possible, store sample back
231 teq ip, r2 @ update weight based on signs
235 .L225: movs ip, r10 @ do same thing for right channel
240 smlalne r11, r3, r0, ip
248 .L229: cmp r7, r1 @ loop back if more samples to do
251 b default_term_exit @ this exit updates all dpp->samples
254 ******************************************************************************
255 * Loop to handle default term condition
257 * r0 = dpp->weight_B r8 = result accumulator
259 * r2 = dpp->term r10 =
260 * r3 = decorrelation value r11 = lo accumulator (for rounding)
261 * r4 = dpp->weight_A ip = current sample
263 * r6 = dpp->delta lr =
265 *******************************************************************************
269 ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term
270 ldr ip, [r1], #4 @ get original sample and bump ptr
274 smlalne r11, r8, r4, r3
275 strne r8, [r1, #-4] @ if possibly changed, store updated sample
278 teq ip, r3 @ update weight based on signs
282 .L350: ldr r3, [r1, -r2, asl #3] @ do the same thing for right channel
287 smlalne r11, r8, r0, r3
295 .L354: cmp r7, r1 @ loop back if more samples to do
296 bhi term_default_loop
299 * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
300 * structure (even if they are not all used for the given term)
308 .L358: and r3, ip, #7
309 add r3, r5, r3, asl #2
321 ******************************************************************************
322 * Loop to handle term = -1 condition
324 * r0 = dpp->weight_B r8 =
326 * r2 = intermediate result r10 = -1024 (for clipping)
327 * r3 = previous right sample r11 = lo accumulator (for rounding)
328 * r4 = dpp->weight_A ip = current sample
330 * r6 = dpp->delta lr = updated left sample
332 *******************************************************************************
339 ldr ip, [r1], #8 @ for left channel the decorrelation value
340 movs r3, r3, asl #4 @ is the previous right sample (in r3)
343 smlalne r11, lr, r4, r3
347 teq ip, r3 @ update weight based on signs
350 cmp r4, #(1024 << 18)
351 movgt r4, #(1024 << 18)
355 .L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value
359 smlalne r11, r3, r0, lr
366 cmp r0, #(1024 << 18) @ then clip weight to +/-1024
367 movgt r0, #(1024 << 18)
371 .L369: cmp r7, r1 @ loop back if more samples to do
372 bhi term_minus_1_loop
374 str r3, [r5, #8] @ else store right sample and exit
378 ******************************************************************************
379 * Loop to handle term = -2 condition
380 * (note that the channels are processed in the reverse order here)
382 * r0 = dpp->weight_B r8 =
384 * r2 = intermediate result r10 = -1024 (for clipping)
385 * r3 = previous left sample r11 = lo accumulator (for rounding)
386 * r4 = dpp->weight_A ip = current sample
388 * r6 = dpp->delta lr = updated right sample
390 *******************************************************************************
397 ldr ip, [r1, #4] @ for right channel the decorrelation value
398 movs r3, r3, asl #4 @ is the previous left sample (in r3)
401 smlalne r11, lr, r0, r3
405 teq ip, r3 @ update weight based on signs
408 cmp r0, #(1024 << 18) @ then clip weight to +/-1024
409 movgt r0, #(1024 << 18)
413 .L380: ldr r2, [r1], #8 @ for left channel the decorrelation value
417 smlalne r11, r3, r4, lr
424 cmp r4, #(1024 << 18)
425 movgt r4, #(1024 << 18)
429 .L388: cmp r7, r1 @ loop back if more samples to do
430 bhi term_minus_2_loop
432 str r3, [r5, #40] @ else store left channel and exit
436 ******************************************************************************
437 * Loop to handle term = -3 condition
439 * r0 = dpp->weight_B r8 = previous left sample
441 * r2 = current left sample r10 = -1024 (for clipping)
442 * r3 = previous right sample r11 = lo accumulator (for rounding)
443 * r4 = dpp->weight_A ip = intermediate result
445 * r6 = dpp->delta lr =
447 *******************************************************************************
451 ldr r3, [r1, #-4] @ load previous samples
459 smlalne r11, r2, r4, r3
463 teq ip, r3 @ update weight based on signs
466 cmp r4, #(1024 << 18) @ then clip weight to +/-1024
467 movgt r4, #(1024 << 18)
471 .L399: movs ip, r8, asl #4 @ ip = previous left we use now
472 mov r8, r2 @ r8 = current left we use next time
476 smlalne r11, r3, r0, ip
483 cmp r0, #(1024 << 18)
484 movgt r0, #(1024 << 18)
488 .L407: cmp r7, r1 @ loop back if more samples to do
489 bhi term_minus_3_loop
491 str r3, [r5, #8] @ else store previous samples & exit
495 * Before finally exiting we must store weights back for next time
499 mov r0, r0, asr #18 @ restore weights to real magnitude
503 ldmfd sp!, {r4 - r8, r10, r11, pc}