1 /***************************************************************************
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
10 * Copyright (C) 2006 by David Bryant
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
20 ****************************************************************************/
22 /* This is an assembly optimized version of the following WavPack function:
24 * void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
25 * long *buffer, long sample_count);
27 * It performs a single pass of stereo decorrelation on the provided buffer.
28 * Note that this version of the function requires that the 8 previous stereo
29 * samples are visible and correct. In other words, it ignores the "samples_*"
30 * fields in the decorr_pass structure and gets the history data directly
31 * from the buffer. It does, however, return the appropriate history samples
32 * to the decorr_pass structure before returning.
34 * This is written to work on a ARM7TDMI processor. This version only uses the
35 * 32-bit multiply-accumulate instruction and so will overflow with 24-bit
40 .global decorr_stereo_pass_cont_arm
45 * r0 = struct decorr_pass *dpp
47 * r2 = long sample_count
50 decorr_stereo_pass_cont_arm:
52 stmfd sp!, {r4 - r8, r10, r11, lr}
54 mov r11, #512 @ r11 = 512 for rounding
55 ldrsh r6, [r0, #2] @ r6 = dpp->delta
56 ldrsh r4, [r0, #4] @ r4 = dpp->weight_A
57 ldrsh r0, [r0, #6] @ r0 = dpp->weight_B
58 cmp r2, #0 @ exit if no samples to process
61 add r7, r1, r2, asl #3 @ r7 = buffer ending position
62 ldrsh r2, [r5, #0] @ r2 = dpp->term
66 ldr lr, [r1, #-16] @ load 2 sample history from buffer
67 ldr r10, [r1, #-12] @ for terms 2, 17, and 18
76 b term_default_loop @ else handle default (1-8, except 2)
79 mov r10, #1024 @ r10 = -1024 for weight clipping
80 rsb r10, r10, #0 @ (only used for negative terms)
90 ******************************************************************************
91 * Loop to handle term = 17 condition
93 * r0 = dpp->weight_B r8 = previous left sample
95 * r2 = current sample r10 = second previous left sample
96 * r3 = previous right sample r11 = 512 (for rounding)
97 * r4 = dpp->weight_A ip = current decorrelation value
99 * r6 = dpp->delta lr = second previous right sample
101 *******************************************************************************
105 rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev
106 mov lr, r8 @ previous becomes 2nd previous
107 ldr r2, [r1], #4 @ get sample & update pointer
108 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
109 add r8, r2, r8, asr #10 @ shift, and add to new sample
110 strne r8, [r1, #-4] @ if change possible, store sample back
113 teq ip, r2 @ update weight based on signs
117 .L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel
121 add r3, r2, r3, asr #10
129 .L329: cmp r7, r1 @ loop back if more samples to do
131 b store_1718 @ common exit for terms 17 & 18
134 ******************************************************************************
135 * Loop to handle term = 18 condition
137 * r0 = dpp->weight_B r8 = previous left sample
139 * r2 = current sample r10 = second previous left sample
140 * r3 = previous right sample r11 = 512 (for rounding)
141 * r4 = dpp->weight_A ip = decorrelation value
143 * r6 = dpp->delta lr = second previous right sample
145 *******************************************************************************
149 sub ip, r8, lr @ decorr value =
150 mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1
151 adds ip, r8, ip, asr #1
152 ldr r2, [r1], #4 @ get sample & update pointer
153 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
154 add r8, r2, r8, asr #10 @ shift, and add to new sample
155 strne r8, [r1, #-4] @ if change possible, store sample back
158 teq ip, r2 @ update weight based on signs
162 .L337: sub ip, r3, r10 @ do same thing for right channel
164 adds ip, r3, ip, asr #1
167 add r3, r2, r3, asr #10
175 .L341: cmp r7, r1 @ loop back if more samples to do
178 /* common exit for terms 17 & 18 */
181 str r3, [r5, #40] @ store sample history into struct
185 b common_exit @ and return
188 ******************************************************************************
189 * Loop to handle term = 2 condition
190 * (note that this case can be handled by the default term handler (1-8), but
191 * this special case is faster because it doesn't have to read memory twice)
193 * r0 = dpp->weight_B r8 = previous left sample
195 * r2 = current sample r10 = second previous left sample
196 * r3 = previous right sample r11 = 512 (for rounding)
197 * r4 = dpp->weight_A ip = decorrelation value
199 * r6 = dpp->delta lr = second previous right sample
201 *******************************************************************************
205 movs ip, lr @ get decorrelation value & test
206 mov lr, r8 @ previous becomes 2nd previous
207 ldr r2, [r1], #4 @ get sample & update pointer
208 mla r8, ip, r4, r11 @ mult decorr value by weight, round,
209 add r8, r2, r8, asr #10 @ shift, and add to new sample
210 strne r8, [r1, #-4] @ if change possible, store sample back
213 teq ip, r2 @ update weight based on signs
217 .L225: movs ip, r10 @ do same thing for right channel
221 add r3, r2, r3, asr #10
229 .L229: cmp r7, r1 @ loop back if more samples to do
231 b default_term_exit @ this exit updates all dpp->samples
234 ******************************************************************************
235 * Loop to handle default term condition
237 * r0 = dpp->weight_B r8 = result accumulator
239 * r2 = dpp->term r10 =
240 * r3 = decorrelation value r11 = 512 (for rounding)
241 * r4 = dpp->weight_A ip = current sample
243 * r6 = dpp->delta lr =
245 *******************************************************************************
249 ldr ip, [r1] @ get original sample
250 ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term
251 mla r8, r3, r4, r11 @ mult decorr value by weight, round,
252 add r8, ip, r8, asr #10 @ shift and add to new sample
253 str r8, [r1], #4 @ store update sample
257 teq ip, r3 @ update weight based on signs
261 .L350: ldr ip, [r1] @ do the same thing for right channel
262 ldr r3, [r1, -r2, asl #3]
264 add r8, ip, r8, asr #10
273 .L354: cmp r7, r1 @ loop back if more samples to do
274 bhi term_default_loop
277 * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
278 * structure (even if they are not all used for the given term)
286 .L358: and r3, ip, #7
287 add r3, r5, r3, asl #2
299 ******************************************************************************
300 * Loop to handle term = -1 condition
302 * r0 = dpp->weight_B r8 =
304 * r2 = intermediate result r10 = -1024 (for clipping)
305 * r3 = previous right sample r11 = 512 (for rounding)
306 * r4 = dpp->weight_A ip = current sample
308 * r6 = dpp->delta lr = updated left sample
310 *******************************************************************************
317 ldr ip, [r1] @ for left channel the decorrelation value
318 mla r2, r3, r4, r11 @ is the previous right sample (in r3)
319 add lr, ip, r2, asr #10
324 teq ip, r3 @ update weight based on signs
332 .L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value
333 mla r3, lr, r0, r11 @ is the just updated right sample (in lr)
334 add r3, r2, r3, asr #10
342 cmp r0, #1024 @ then clip weight to +/-1024
347 .L369: cmp r7, r1 @ loop back if more samples to do
348 bhi term_minus_1_loop
350 str r3, [r5, #8] @ else store right sample and exit
354 ******************************************************************************
355 * Loop to handle term = -2 condition
356 * (note that the channels are processed in the reverse order here)
358 * r0 = dpp->weight_B r8 =
360 * r2 = intermediate result r10 = -1024 (for clipping)
361 * r3 = previous left sample r11 = 512 (for rounding)
362 * r4 = dpp->weight_A ip = current sample
364 * r6 = dpp->delta lr = updated right sample
366 *******************************************************************************
373 ldr ip, [r1, #4] @ for right channel the decorrelation value
374 mla r2, r3, r0, r11 @ is the previous left sample (in r3)
375 add lr, ip, r2, asr #10
380 teq ip, r3 @ update weight based on signs
383 cmp r0, #1024 @ then clip weight to +/-1024
388 .L380: ldr r2, [r1, #0] @ for left channel the decorrelation value
389 mla r3, lr, r4, r11 @ is the just updated left sample (in lr)
390 add r3, r2, r3, asr #10
403 .L388: cmp r7, r1 @ loop back if more samples to do
404 bhi term_minus_2_loop
406 str r3, [r5, #40] @ else store left channel and exit
410 ******************************************************************************
411 * Loop to handle term = -3 condition
413 * r0 = dpp->weight_B r8 = previous left sample
415 * r2 = current left sample r10 = -1024 (for clipping)
416 * r3 = previous right sample r11 = 512 (for rounding)
417 * r4 = dpp->weight_A ip = intermediate result
419 * r6 = dpp->delta lr =
421 *******************************************************************************
425 ldr r3, [r1, #-4] @ load previous samples
431 add r2, ip, r2, asr #10
436 teq ip, r3 @ update weight based on signs
439 cmp r4, #1024 @ then clip weight to +/-1024
444 .L399: movs ip, r8 @ ip = previous left we use now
445 mov r8, r2 @ r8 = current left we use next time
448 add r3, r2, r3, asr #10
460 .L407: cmp r7, r1 @ loop back if more samples to do
461 bhi term_minus_3_loop
463 str r3, [r5, #8] @ else store previous samples & exit
467 * Before finally exiting we must store weights back for next time
473 ldmfd sp!, {r4 - r8, r10, r11, pc}