apps/codecs/libwavpack/arm.S

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2006 by David Bryant
  11  *
  12  * This program is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU General Public License
  14  * as published by the Free Software Foundation; either version 2
  15  * of the License, or (at your option) any later version.
  16  *
  17  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  18  * KIND, either express or implied.
  19  *
  20  ****************************************************************************/
  21
  22 /* This is an assembly optimized version of the following WavPack function:
  23  *
  24  * void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp,
  25  *                                   long *buffer, long sample_count);
  26  *
  27  * It performs a single pass of stereo decorrelation on the provided buffer.
  28  * Note that this version of the function requires that the 8 previous stereo
  29  * samples are visible and correct. In other words, it ignores the "samples_*"
  30  * fields in the decorr_pass structure and gets the history data directly
  31  * from the buffer. It does, however, return the appropriate history samples
  32  * to the decorr_pass structure before returning.
  33  *
  34  * This is written to work on a ARM7TDMI processor. This version only uses the
  35  * 32-bit multiply-accumulate instruction and so will overflow with 24-bit
  36  * WavPack files.
  37  */
  38         .text
  39         .align
  40         .global         decorr_stereo_pass_cont_arm
  41
  42 /*
  43  * on entry:
  44  *
  45  * r0 = struct decorr_pass *dpp
  46  * r1 = long *buffer
  47  * r2 = long sample_count
  48  */
  49
  50 decorr_stereo_pass_cont_arm:
  51
  52         stmfd   sp!, {r4 - r8, r10, r11, lr}
  53         mov     r5, r0                  @ r5 = dpp
  54         mov     r11, #512               @ r11 = 512 for rounding
  55         ldrsh   r6, [r0, #2]            @ r6 = dpp->delta
  56         ldrsh   r4, [r0, #4]            @ r4 = dpp->weight_A
  57         ldrsh   r0, [r0, #6]            @ r0 = dpp->weight_B
  58         cmp     r2, #0                  @ exit if no samples to process
  59         beq     common_exit
  60
  61         add     r7, r1, r2, asl #3      @ r7 = buffer ending position
  62         ldrsh   r2, [r5, #0]            @ r2 = dpp->term
  63         cmp     r2, #0
  64         bmi     minus_term
  65
  66         ldr     lr, [r1, #-16]          @ load 2 sample history from buffer
  67         ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18
  68         ldr     r8, [r1, #-8]
  69         ldr     r3, [r1, #-4]
  70         cmp     r2, #17
  71         beq     term_17_loop
  72         cmp     r2, #18
  73         beq     term_18_loop
  74         cmp     r2, #2
  75         beq     term_2_loop
  76         b       term_default_loop       @ else handle default (1-8, except 2)
  77
  78 minus_term:
  79         mov     r10, #1024              @ r10 = -1024 for weight clipping
  80         rsb     r10, r10, #0            @  (only used for negative terms)
  81         cmn     r2, #1
  82         beq     term_minus_1
  83         cmn     r2, #2
  84         beq     term_minus_2
  85         cmn     r2, #3
  86         beq     term_minus_3
  87         b       common_exit
  88
  89 /*
  90  ******************************************************************************
  91  * Loop to handle term = 17 condition
  92  *
  93  * r0 = dpp->weight_B           r8 = previous left sample
  94  * r1 = bptr                    r9 =
  95  * r2 = current sample          r10 = second previous left sample
  96  * r3 = previous right sample   r11 = 512 (for rounding)
  97  * r4 = dpp->weight_A           ip = current decorrelation value
  98  * r5 = dpp                     sp =
  99  * r6 = dpp->delta              lr = second previous right sample
 100  * r7 = eptr                    pc =
 101  *******************************************************************************
 102  */
 103
 104 term_17_loop:
 105         rsbs    ip, lr, r8, asl #1      @ decorr value = (2 * prev) - 2nd prev
 106         mov     lr, r8                  @ previous becomes 2nd previous
 107         ldr     r2, [r1], #4            @ get sample & update pointer
 108         mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
 109         add     r8, r2, r8, asr #10     @  shift, and add to new sample
 110         strne   r8, [r1, #-4]           @ if change possible, store sample back
 111         cmpne   r2, #0
 112         beq     .L325
 113         teq     ip, r2                  @ update weight based on signs
 114         submi   r4, r4, r6
 115         addpl   r4, r4, r6
 116
 117 .L325:  rsbs    ip, r10, r3, asl #1     @ do same thing for right channel
 118         mov     r10, r3
 119         ldr     r2, [r1], #4
 120         mla     r3, ip, r0, r11
 121         add     r3, r2, r3, asr #10
 122         strne   r3, [r1, #-4]
 123         cmpne   r2, #0
 124         beq     .L329
 125         teq     ip, r2
 126         submi   r0, r0, r6
 127         addpl   r0, r0, r6
 128
 129 .L329:  cmp     r7, r1                  @ loop back if more samples to do
 130         bhi     term_17_loop
 131         b       store_1718              @ common exit for terms 17 & 18
 132
 133 /*
 134  ******************************************************************************
 135  * Loop to handle term = 18 condition
 136  *
 137  * r0 = dpp->weight_B           r8 = previous left sample
 138  * r1 = bptr                    r9 =
 139  * r2 = current sample          r10 = second previous left sample
 140  * r3 = previous right sample   r11 = 512 (for rounding)
 141  * r4 = dpp->weight_A           ip = decorrelation value
 142  * r5 = dpp                     sp =
 143  * r6 = dpp->delta              lr = second previous right sample
 144  * r7 = eptr                    pc =
 145  *******************************************************************************
 146  */
 147
 148 term_18_loop:
 149         sub     ip, r8, lr              @ decorr value =
 150         mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1
 151         adds    ip, r8, ip, asr #1
 152         ldr     r2, [r1], #4            @ get sample & update pointer
 153         mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
 154         add     r8, r2, r8, asr #10     @  shift, and add to new sample
 155         strne   r8, [r1, #-4]           @ if change possible, store sample back
 156         cmpne   r2, #0
 157         beq     .L337
 158         teq     ip, r2                  @ update weight based on signs
 159         submi   r4, r4, r6
 160         addpl   r4, r4, r6
 161
 162 .L337:  sub     ip, r3, r10             @ do same thing for right channel
 163         mov     r10, r3
 164         adds    ip, r3, ip, asr #1
 165         ldr     r2, [r1], #4
 166         mla     r3, ip, r0, r11
 167         add     r3, r2, r3, asr #10
 168         strne   r3, [r1, #-4]
 169         cmpne   r2, #0
 170         beq     .L341
 171         teq     ip, r2
 172         submi   r0, r0, r6
 173         addpl   r0, r0, r6
 174
 175 .L341:  cmp     r7, r1                  @ loop back if more samples to do
 176         bhi     term_18_loop
 177
 178 /* common exit for terms 17 & 18 */
 179
 180 store_1718:
 181         str     r3, [r5, #40]           @ store sample history into struct
 182         str     r8, [r5, #8]
 183         str     r10, [r5, #44]
 184         str     lr, [r5, #12]
 185         b       common_exit             @ and return
 186
 187 /*
 188  ******************************************************************************
 189  * Loop to handle term = 2 condition
 190  * (note that this case can be handled by the default term handler (1-8), but
 191  * this special case is faster because it doesn't have to read memory twice)
 192  *
 193  * r0 = dpp->weight_B           r8 = previous left sample
 194  * r1 = bptr                    r9 =
 195  * r2 = current sample          r10 = second previous left sample
 196  * r3 = previous right sample   r11 = 512 (for rounding)
 197  * r4 = dpp->weight_A           ip = decorrelation value
 198  * r5 = dpp                     sp =
 199  * r6 = dpp->delta              lr = second previous right sample
 200  * r7 = eptr                    pc =
 201  *******************************************************************************
 202  */
 203
 204 term_2_loop:
 205         movs    ip, lr                  @ get decorrelation value & test
 206         mov     lr, r8                  @ previous becomes 2nd previous
 207         ldr     r2, [r1], #4            @ get sample & update pointer
 208         mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
 209         add     r8, r2, r8, asr #10     @  shift, and add to new sample
 210         strne   r8, [r1, #-4]           @ if change possible, store sample back
 211         cmpne   r2, #0
 212         beq     .L225
 213         teq     ip, r2                  @ update weight based on signs
 214         submi   r4, r4, r6
 215         addpl   r4, r4, r6
 216
 217 .L225:  movs    ip, r10                 @ do same thing for right channel
 218         mov     r10, r3
 219         ldr     r2, [r1], #4
 220         mla     r3, ip, r0, r11
 221         add     r3, r2, r3, asr #10
 222         strne   r3, [r1, #-4]
 223         cmpne   r2, #0
 224         beq     .L229
 225         teq     ip, r2
 226         submi   r0, r0, r6
 227         addpl   r0, r0, r6
 228
 229 .L229:  cmp     r7, r1                  @ loop back if more samples to do
 230         bhi     term_2_loop
 231         b       default_term_exit       @ this exit updates all dpp->samples
 232
 233 /*
 234  ******************************************************************************
 235  * Loop to handle default term condition
 236  *
 237  * r0 = dpp->weight_B           r8 = result accumulator
 238  * r1 = bptr                    r9 =
 239  * r2 = dpp->term               r10 =
 240  * r3 = decorrelation value     r11 = 512 (for rounding)
 241  * r4 = dpp->weight_A           ip = current sample
 242  * r5 = dpp                     sp =
 243  * r6 = dpp->delta              lr =
 244  * r7 = eptr                    pc =
 245  *******************************************************************************
 246  */
 247
 248 term_default_loop:
 249         ldr     ip, [r1]                @ get original sample
 250         ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term
 251         mla     r8, r3, r4, r11         @ mult decorr value by weight, round,
 252         add     r8, ip, r8, asr #10     @  shift and add to new sample
 253         str     r8, [r1], #4            @ store update sample
 254         cmp     r3, #0
 255         cmpne   ip, #0
 256         beq     .L350
 257         teq     ip, r3                  @ update weight based on signs
 258         submi   r4, r4, r6
 259         addpl   r4, r4, r6
 260
 261 .L350:  ldr     ip, [r1]                @ do the same thing for right channel
 262         ldr     r3, [r1, -r2, asl #3]
 263         mla     r8, r3, r0, r11
 264         add     r8, ip, r8, asr #10
 265         str     r8, [r1], #4
 266         cmp     r3, #0
 267         cmpne   ip, #0
 268         beq     .L354
 269         teq     ip, r3
 270         submi   r0, r0, r6
 271         addpl   r0, r0, r6
 272
 273 .L354:  cmp     r7, r1                  @ loop back if more samples to do
 274         bhi     term_default_loop
 275
 276 /*
 277  * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
 278  * structure (even if they are not all used for the given term)
 279  */
 280
 281 default_term_exit:
 282         ldrsh   r3, [r5, #0]
 283         sub     ip, r3, #1
 284         mov     lr, #7
 285
 286 .L358:  and     r3, ip, #7
 287         add     r3, r5, r3, asl #2
 288         ldr     r2, [r1, #-4]
 289         str     r2, [r3, #40]
 290         ldr     r2, [r1, #-8]!
 291         str     r2, [r3, #8]
 292         sub     ip, ip, #1
 293         sub     lr, lr, #1
 294         cmn     lr, #1
 295         bne     .L358
 296         b       common_exit
 297
 298 /*
 299  ******************************************************************************
 300  * Loop to handle term = -1 condition
 301  *
 302  * r0 = dpp->weight_B           r8 =
 303  * r1 = bptr                    r9 =
 304  * r2 = intermediate result     r10 = -1024 (for clipping)
 305  * r3 = previous right sample   r11 = 512 (for rounding)
 306  * r4 = dpp->weight_A           ip = current sample
 307  * r5 = dpp                     sp =
 308  * r6 = dpp->delta              lr = updated left sample
 309  * r7 = eptr                    pc =
 310  *******************************************************************************
 311  */
 312
 313 term_minus_1:
 314         ldr     r3, [r1, #-4]
 315
 316 term_minus_1_loop:
 317         ldr     ip, [r1]                @ for left channel the decorrelation value
 318         mla     r2, r3, r4, r11         @  is the previous right sample (in r3)
 319         add     lr, ip, r2, asr #10
 320         str     lr, [r1], #8
 321         cmp     r3, #0
 322         cmpne   ip, #0
 323         beq     .L361
 324         teq     ip, r3                  @ update weight based on signs
 325         submi   r4, r4, r6
 326         addpl   r4, r4, r6
 327         cmp     r4, #1024
 328         movgt   r4, #1024
 329         cmp     r4, r10
 330         movlt   r4, r10
 331
 332 .L361:  ldr     r2, [r1, #-4]           @ for right channel the decorrelation value
 333         mla     r3, lr, r0, r11         @  is the just updated right sample (in lr)
 334         add     r3, r2, r3, asr #10
 335         str     r3, [r1, #-4]
 336         cmp     lr, #0
 337         cmpne   r2, #0
 338         beq     .L369
 339         teq     r2, lr
 340         submi   r0, r0, r6
 341         addpl   r0, r0, r6
 342         cmp     r0, #1024               @ then clip weight to +/-1024
 343         movgt   r0, #1024
 344         cmp     r0, r10
 345         movlt   r0, r10
 346
 347 .L369:  cmp     r7, r1                  @ loop back if more samples to do
 348         bhi     term_minus_1_loop
 349
 350         str     r3, [r5, #8]            @ else store right sample and exit
 351         b       common_exit
 352
 353 /*
 354  ******************************************************************************
 355  * Loop to handle term = -2 condition
 356  * (note that the channels are processed in the reverse order here)
 357  *
 358  * r0 = dpp->weight_B           r8 =
 359  * r1 = bptr                    r9 =
 360  * r2 = intermediate result     r10 = -1024 (for clipping)
 361  * r3 = previous left sample    r11 = 512 (for rounding)
 362  * r4 = dpp->weight_A           ip = current sample
 363  * r5 = dpp                     sp =
 364  * r6 = dpp->delta              lr = updated right sample
 365  * r7 = eptr                    pc =
 366  *******************************************************************************
 367  */
 368
 369 term_minus_2:
 370         ldr     r3, [r1, #-8]
 371
 372 term_minus_2_loop:
 373         ldr     ip, [r1, #4]            @ for right channel the decorrelation value
 374         mla     r2, r3, r0, r11         @  is the previous left sample (in r3)
 375         add     lr, ip, r2, asr #10
 376         str     lr, [r1, #4]
 377         cmp     r3, #0
 378         cmpne   ip, #0
 379         beq     .L380
 380         teq     ip, r3                  @ update weight based on signs
 381         submi   r0, r0, r6
 382         addpl   r0, r0, r6
 383         cmp     r0, #1024               @ then clip weight to +/-1024
 384         movgt   r0, #1024
 385         cmp     r0, r10
 386         movlt   r0, r10
 387
 388 .L380:  ldr     r2, [r1, #0]            @ for left channel the decorrelation value
 389         mla     r3, lr, r4, r11         @  is the just updated left sample (in lr)
 390         add     r3, r2, r3, asr #10
 391         str     r3, [r1], #8
 392         cmp     lr, #0
 393         cmpne   r2, #0
 394         beq     .L388
 395         teq     r2, lr
 396         submi   r4, r4, r6
 397         addpl   r4, r4, r6
 398         cmp     r4, #1024
 399         movgt   r4, #1024
 400         cmp     r4, r10
 401         movlt   r4, r10
 402
 403 .L388:  cmp     r7, r1                  @ loop back if more samples to do
 404         bhi     term_minus_2_loop
 405
 406         str     r3, [r5, #40]           @ else store left channel and exit
 407         b       common_exit
 408
 409 /*
 410  ******************************************************************************
 411  * Loop to handle term = -3 condition
 412  *
 413  * r0 = dpp->weight_B           r8 = previous left sample
 414  * r1 = bptr                    r9 =
 415  * r2 = current left sample     r10 = -1024 (for clipping)
 416  * r3 = previous right sample   r11 = 512 (for rounding)
 417  * r4 = dpp->weight_A           ip = intermediate result
 418  * r5 = dpp                     sp =
 419  * r6 = dpp->delta              lr =
 420  * r7 = eptr                    pc =
 421  *******************************************************************************
 422  */
 423
 424 term_minus_3:
 425         ldr     r3, [r1, #-4]           @ load previous samples
 426         ldr     r8, [r1, #-8]
 427
 428 term_minus_3_loop:
 429         ldr     ip, [r1]
 430         mla     r2, r3, r4, r11
 431         add     r2, ip, r2, asr #10
 432         str     r2, [r1], #4
 433         cmp     r3, #0
 434         cmpne   ip, #0
 435         beq     .L399
 436         teq     ip, r3                  @ update weight based on signs
 437         submi   r4, r4, r6
 438         addpl   r4, r4, r6
 439         cmp     r4, #1024               @ then clip weight to +/-1024
 440         movgt   r4, #1024
 441         cmp     r4, r10
 442         movlt   r4, r10
 443
 444 .L399:  movs    ip, r8                  @ ip = previous left we use now
 445         mov     r8, r2                  @ r8 = current left we use next time
 446         ldr     r2, [r1], #4
 447         mla     r3, ip, r0, r11
 448         add     r3, r2, r3, asr #10
 449         strne   r3, [r1, #-4]
 450         cmpne   r2, #0
 451         beq     .L407
 452         teq     ip, r2
 453         submi   r0, r0, r6
 454         addpl   r0, r0, r6
 455         cmp     r0, #1024
 456         movgt   r0, #1024
 457         cmp     r0, r10
 458         movlt   r0, r10
 459
 460 .L407:  cmp     r7, r1                  @ loop back if more samples to do
 461         bhi     term_minus_3_loop
 462
 463         str     r3, [r5, #8]            @ else store previous samples & exit
 464         str     r8, [r5, #40]
 465
 466 /*
 467  * Before finally exiting we must store weights back for next time
 468  */
 469
 470 common_exit:
 471         strh    r4, [r5, #4]
 472         strh    r0, [r5, #6]
 473         ldmfd   sp!, {r4 - r8, r10, r11, pc}
 474