apps/codecs/libwavpack/coldfire.S

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2005 by David Bryant
  11  *
  12  * This program is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU General Public License
  14  * as published by the Free Software Foundation; either version 2
  15  * of the License, or (at your option) any later version.
  16  *
  17  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  18  * KIND, either express or implied.
  19  *
  20  ****************************************************************************/
  21
  22 /* This is an assembly optimized version of the following WavPack function:
  23  *
  24  * void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp,
  25  *                                       long *buffer, long sample_count);
  26  *
  27  * It performs a single pass of stereo decorrelation on the provided buffer.
  28  * Note that this version of the function requires that the 8 previous stereo
  29  * samples are visible and correct. In other words, it ignores the "samples_*"
  30  * fields in the decorr_pass structure and gets the history data directly
  31  * from the buffer. It does, however, return the appropriate history samples
  32  * to the decorr_pass structure before returning.
  33  *
  34  * This is written to work on a MCF5249 processor, or any processor based on
  35  * the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for
  36  * the "apply_weight" function of WavPack decorrelation because it provides
  37  * the requires 40-bit product. The fractional rounding mode of the EMAC is not
  38  * configurable and uses "round to even" while WavPack uses "round to larger",
  39  * so the rounding has to be done manually.
  40  */
  41
  42         .text
  43         .align  2
  44         .global decorr_stereo_pass_cont_mcf5249
  45
  46 decorr_stereo_pass_cont_mcf5249:
  47
  48         lea     (-44, %sp), %sp
  49         movem.l %d2-%d7/%a2-%a6, (%sp)
  50         move.l  44+4(%sp), %a2          | a2 = dpp->
  51         move.l  44+8(%sp), %a1          | a1 = bptr
  52         move.w  2(%a2), %a3             | a3 = dpp->delta
  53         move.w  4(%a2), %d3             | d3 = dpp->weight_A (sign extended)
  54         ext.l   %d3
  55         move.w  6(%a2), %d4             | d4 = dpp->weight_B (sign extended)
  56         ext.l   %d4
  57         move.l 44+12(%sp), %d0          | d0 = sample_count
  58         jbeq    return_only             | if zero, nothing to do
  59
  60         lsl.l   #3, %d0                 | d5 = bptr + (sample_count * 8)
  61         move.l  %d0, %d5
  62         add.l   %a1, %d5
  63
  64         moveq.l #17, %d0                | left shift weights & delta 17 places
  65         asl.l   %d0, %d3
  66         asl.l   %d0, %d4
  67         move.l  %a3, %d1
  68         asl.l   %d0, %d1
  69         move.l  %d1, %a3
  70
  71         moveq.l #0x20, %d6
  72         move.l  %d6, %macsr             | set fractional mode for MAC
  73         move.l  #0x800000, %accext01    | acc1 = 0x00 0000 80 (for rounding)
  74
  75         move.l  #1024<<17, %d6          | d6 & d7 are weight clipping limits
  76         move.l  #-1024<<17, %d7         | (only used by negative terms)
  77
  78         move.w  (%a2), %d0              | d0 = term
  79         ext.l   %d0
  80         cmp.l   #17, %d0
  81         jbeq    term_17                 | term = 17
  82         cmp.l   #18, %d0
  83         jbeq    term_18                 | term = 18
  84         addq.l  #1, %d0
  85         jbeq    term_minus_1            | term = -1
  86         addq.l  #1, %d0
  87         jbeq    term_minus_2            | term = -2
  88         addq.l  #1, %d0
  89         jbeq    term_minus_3            | term = -3
  90         jbra    term_default            | default term = 1 - 8
  91
  92 |------------------------------------------------------------------------------
  93 | Loop to handle term = 17 condition
  94 |
  95 | a0 =                          d0 = (2 * bptr [-1]) - bptr [-2]
  96 | a1 = bptr                     d1 = initial bptr [0]
  97 | a2 = dpp->                    d2 = updated bptr [0]
  98 | a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
  99 | a4 =                          d4 = dpp->weight_B << 17
 100 | a5 =                          d5 = eptr
 101 | macsr = 0x20                  acc1 = 0x00 0000 80
 102 |------------------------------------------------------------------------------
 103
 104 term_17:
 105         move.l  -8(%a1), %d0            | d0 = 2 * bptr [-1] - bptr [-2]
 106         add.l   %d0, %d0
 107         sub.l   -16(%a1), %d0
 108         beq     .L251                   | if zero, skip calculation
 109         move.l  %acc1, %acc0
 110         asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_A
 111         mac.l   %d0, %d3, %acc0
 112         move.l  (%a1), %d1
 113         beq     .L255
 114         eor.l   %d1, %d0                | else compare signs
 115         bge     .L256                   | if same, add delta to weight
 116         sub.l   %a3, %d3                | else subtract delta from weight
 117         sub.l   %a3, %d3                | subtract again instead of branch
 118 .L256:  add.l   %a3, %d3                | add delta to weight
 119
 120 .L255:  move.l  %acc0, %d2              | d2 = rounded product
 121         add.l   %d1, %d2                | update bptr [0] and store
 122         move.l  %d2, (%a1)+
 123
 124 .L253:  move.l  -8(%a1), %d0            | d0 = 2 * bptr [-1] - bptr [-2]
 125         add.l   %d0, %d0
 126         sub.l   -16(%a1), %d0
 127         beq     .L257                   | if zero, skip calculations
 128         move.l  %acc1, %acc0
 129         asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_B
 130         mac.l   %d0, %d4, %acc0
 131         move.l  (%a1), %d1
 132         beq     .L254
 133         eor.l   %d1, %d0                | else compare signs
 134         bge     .L259                   | if same, add delta to weight
 135         sub.l   %a3, %d4                | else subtract delta from weight
 136         sub.l   %a3, %d4                | subtract again instead of branch
 137 .L259:  add.l   %a3, %d4                | add delta to weight
 138
 139 .L254:  move.l  %acc0, %d2              | d2 = rounded product
 140         add.l   %d1, %d2                | update bptr [0] and store
 141         move.l  %d2, (%a1)+
 142
 143 .L252:  cmp.l   %a1, %d5                | loop if bptr < eptr
 144         jbhi    term_17
 145         bra     term_17_18_finish       | exit through common path
 146
 147 .L251:  addq.l  #4, %a1                 | update point and jump back into loop
 148         bra     .L253
 149
 150 .L257:  addq.l  #4, %a1                 | update point and jump back into loop
 151         bra     .L252
 152
 153 |------------------------------------------------------------------------------
 154 | Loop to handle term = 18 condition
 155 |
 156 | a0 =                          d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1
 157 | a1 = bptr                     d1 = initial bptr [0]
 158 | a2 = dpp->                    d2 = updated bptr [0]
 159 | a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
 160 | a4 =                          d4 = dpp->weight_B << 17
 161 | a5 =                          d5 = eptr
 162 | macsr = 0x20                  acc1 = 0x00 0000 80
 163 |------------------------------------------------------------------------------
 164
 165 term_18:
 166         move.l  -8(%a1), %a0            | d0 = (3 * bptr [-1] - bptr [-2]) >> 1
 167         lea     (%a0,%a0.l*2), %a0
 168         move.l  %a0, %d0
 169         sub.l   -16(%a1), %d0
 170         asr.l   #1, %d0
 171         beq     .L260
 172         move.l  %acc1, %acc0
 173         asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_A
 174         mac.l   %d0, %d3, %acc0
 175         move.l  (%a1), %d1
 176         beq     .L266
 177         eor.l   %d1, %d0                | else compare signs
 178         bge     .L267                   | if same, add delta to weight
 179         sub.l   %a3, %d3                | else subtract delta from weight
 180         sub.l   %a3, %d3                | subtract again instead of branch
 181 .L267:  add.l   %a3, %d3                | add delta to weight
 182
 183 .L266:  move.l  %acc0, %d2              | d2 = rounded product
 184         add.l   %d1, %d2                | add applied weight to bptr [0], store
 185         move.l  %d2, (%a1)+
 186
 187 .L268:  move.l  -8(%a1), %a0            | d0 = (3 * bptr [-1] - bptr [-2]) >> 1
 188         lea     (%a0,%a0.l*2), %a0
 189         move.l  %a0, %d0
 190         sub.l   -16(%a1), %d0
 191         asr.l   #1, %d0
 192         beq     .L261
 193         move.l  %acc1, %acc0
 194         asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_B
 195         mac.l   %d0, %d4, %acc0
 196         move.l  (%a1), %d1
 197         beq     .L265
 198         eor.l   %d1, %d0                | else compare signs
 199         bge     .L270                   | if same, add delta to weight
 200         sub.l   %a3, %d4                | else subtract delta from weight
 201         sub.l   %a3, %d4                | subtract again instead of branch
 202 .L270:  add.l   %a3, %d4                | add delta to weight
 203
 204 .L265:  move.l  %acc0, %d2              | d2 = rounded product
 205         add.l   %d1, %d2                | add applied weight to bptr [0], store
 206         move.l  %d2, (%a1)+
 207
 208 .L269:  cmp.l   %a1, %d5                | loop if bptr < eptr
 209         jbhi    term_18
 210         bra     term_17_18_finish       | exit through common path
 211
 212 .L260:  addq.l  #4, %a1                 | bump pointer and jump back into loop
 213         bra     .L268
 214
 215 .L261:  addq.l  #4, %a1                 | bump pointer and jump back into loop
 216         bra     .L269
 217
 218 term_17_18_finish:
 219         move.l  -4(%a1), 40(%a2)        | restore dpp->samples_A [0-1], B [0-1]
 220         move.l  -8(%a1), 8(%a2)
 221         move.l  -12(%a1), 44(%a2)
 222         move.l  -16(%a1), 12(%a2)
 223         jbra    finish_up
 224
 225 |------------------------------------------------------------------------------
 226 | Loop to handle default terms (i.e. 1 - 8)
 227 |
 228 | a0 = tptr                     d0 = tptr [0]
 229 | a1 = bptr                     d1 = initial bptr [0]
 230 | a2 = dpp->                    d2 = updated bptr [0]
 231 | a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
 232 | a4 =                          d4 = dpp->weight_B << 17
 233 | a5 =                          d5 = eptr
 234 | macsr = 0x20                  acc1 = 0x00 0000 80
 235 |------------------------------------------------------------------------------
 236
 237 term_default:
 238         move.w  (%a2), %d0              | a0 = a1 - (dpp->term * 8)
 239         ext.l   %d0
 240         lsl.l   #3, %d0
 241         move.l  %a1, %a0
 242         sub.l   %d0, %a0
 243
 244 term_default_loop:
 245         move.l  (%a0)+, %d0             | d0 = tptr [0], skip ahead if zero
 246         beq     .L271
 247         move.l  %acc1, %acc0
 248         asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_A
 249         mac.l   %d0, %d3, %acc0
 250         move.l  (%a1), %d1
 251         beq     .L277
 252         eor.l   %d1, %d0                | else compare signs
 253         bge     .L278                   | if same, add delta to weight
 254         sub.l   %a3, %d3                | else subtract delta from weight
 255         sub.l   %a3, %d3                | subtract again instead of branch
 256 .L278:  add.l   %a3, %d3                | add delta to weight
 257
 258 .L277:  move.l  %acc0, %d2              | d2 = rounded product
 259         add.l   %d1, %d2                | add applied weight to bptr [0], store
 260         move.l  %d2, (%a1)+
 261
 262 .L275:  move.l  (%a0)+, %d0             | d0 = tptr [0], skip ahead if zero
 263         beq     .L272
 264         move.l  %acc1, %acc0
 265         asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_B
 266         mac.l   %d0, %d4, %acc0
 267         move.l  (%a1), %d1
 268         beq     .L276
 269         eor.l   %d1, %d0                | else compare signs
 270         bge     .L281                   | if same, add delta to weight
 271         sub.l   %a3, %d4                | else subtract delta from weight
 272         sub.l   %a3, %d4                | subtract again instead of branch
 273 .L281:  add.l   %a3, %d4                | add delta to weight
 274
 275 .L276:  move.l  %acc0, %d2              | d2 = rounded product
 276         add.l   %d1, %d2                | add applied weight to bptr [0], store
 277         move.l  %d2, (%a1)+
 278
 279 .L274:  cmp.l   %a1, %d5                | loop back if bptr < eptr
 280         jbhi    term_default_loop
 281         move.w  (%a2), %d0              | d0 = term - 1
 282         moveq.l #8, %d1                 | d1 = loop counter
 283
 284 .L323:  subq.l  #1, %d0                 | back up & mask index
 285         and.l   #7, %d0
 286         move.l  -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0]
 287         move.l  -(%a1), 8(%a2,%d0.l*4)  | store dpp->samples_A [d0]
 288         subq.l  #1, %d1                 | loop on count
 289         jbne    .L323
 290         jbra    finish_up
 291
 292 .L271:  addq.l  #4, %a1                 | bump pointer and jump back into loop
 293         bra     .L275
 294
 295 .L272:  addq.l  #4, %a1                 | bump pointer and jump back into loop
 296         bra     .L274
 297
 298
 299 |------------------------------------------------------------------------------
 300 | Loop to handle term = -1 condition
 301 |
 302 | a0 =                          d0 = decorrelation sample
 303 | a1 = bptr                     d1 = initial bptr [0]
 304 | a2 = dpp->                    d2 = updated bptr [0]
 305 | a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
 306 | a4 =                          d4 = dpp->weight_B << 17
 307 | a5 =                          d5 = eptr
 308 | a6 =                          d6 = 1024 << 17
 309 | a7 =                          d7 = -1024 << 17
 310 | macsr = 0x20                  acc1 = 0x00 0000 80
 311 |------------------------------------------------------------------------------
 312
 313 term_minus_1:
 314         move.l  -4(%a1), %d0            | d0 = bptr [-1]
 315         beq     .L402
 316         move.l  %acc1, %acc0
 317         asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_A)
 318         mac.l   %d0, %d3, %acc0
 319         move.l  (%a1), %d1
 320         beq     .L405
 321         eor.l   %d1, %d0                | else compare signs
 322         bge     .L404                   | if same, add delta to weight
 323         sub.l   %a3, %d3                | else subtract delta from weight
 324         cmp.l   %d7, %d3                | check for negative clip limit
 325         bge     .L405
 326         move.l  %d7, %d3
 327         bra     .L405
 328
 329 .L404:  add.l   %a3, %d3                | add delta to weight
 330         cmp.l   %d6, %d3                | check for positive clip limit
 331         ble     .L405
 332         move.l  %d6, %d3
 333
 334 .L405:  move.l  %acc0, %d0              | d2 = rounded product
 335         add.l   %d1, %d0                | add applied weight to bptr [0], store
 336         move.l  %d0, (%a1)+
 337         beq     .L401
 338
 339 .L410:  move.l  %acc1, %acc0
 340         asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_B)
 341         mac.l   %d0, %d4, %acc0
 342         move.l  (%a1), %d1
 343         beq     .L403
 344         eor.l   %d1, %d0                | else compare signs
 345         bge     .L407                   | if same, add delta to weight
 346         sub.l   %a3, %d4                | else subtract delta from weight
 347         cmp.l   %d7, %d4                | check for negative clip limit
 348         bge     .L403
 349         move.l  %d7, %d4
 350         bra     .L403
 351
 352 .L407:  add.l   %a3, %d4                | add delta to weight
 353         cmp.l   %d6, %d4                | check for positive clip limit
 354         ble     .L403
 355         move.l  %d6, %d4
 356
 357 .L403:  move.l  %acc0, %d2              | d2 = rounded product
 358         add.l   %d1, %d2                | add applied weight to bptr [1], store
 359         move.l  %d2, (%a1)+
 360
 361 .L411:  cmp.l   %a1, %d5                | loop back if bptr < eptr
 362         jbhi    term_minus_1
 363         move.l  -4(%a1), 8(%a2)         | dpp->samples_A [0] = bptr [-1]
 364         jbra    finish_up
 365
 366 .L402:  move.l  (%a1)+, %d0
 367         bne     .L410
 368
 369 .L401:  addq.l  #4, %a1
 370         bra     .L411
 371
 372
 373 |------------------------------------------------------------------------------
 374 | Loop to handle term = -2 condition
 375 |
 376 | a0 =                          d0 = decorrelation sample
 377 | a1 = bptr                     d1 = initial bptr [0]
 378 | a2 = dpp->                    d2 = updated bptr [0]
 379 | a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
 380 | a4 =                          d4 = dpp->weight_B << 17
 381 | a5 =                          d5 = eptr
 382 | a6 =                          d6 = 1024 << 17
 383 | a7 =                          d7 = -1024 << 17
 384 | macsr = 0x20                  acc1 = 0x00 0000 80
 385 |------------------------------------------------------------------------------
 386
 387 term_minus_2:
 388         move.l  -8(%a1), %d0            | d0 = bptr [-2]
 389         beq     .L511
 390         move.l  %acc1, %acc0
 391         asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_B)
 392         mac.l   %d0, %d4, %acc0
 393         move.l  4(%a1), %d1
 394         beq     .L505
 395         eor.l   %d1, %d0                | else compare signs
 396         bge     .L504                   | if same, add delta to weight
 397         sub.l   %a3, %d4                | else subtract delta from weight
 398         cmp.l   %d7, %d4                | ckeck for negative clip limit
 399         bge     .L505
 400         move.l  %d7, %d4
 401         bra     .L505
 402
 403 .L504:  add.l   %a3, %d4                | add delta to weight
 404         cmp.l   %d6, %d4                | check for positive clip limit
 405         ble     .L505
 406         move.l  %d6, %d4
 407
 408 .L505:  move.l  %acc0, %d0              | d2 = rounded product
 409         add.l   %d1, %d0                | add applied weight to bptr [0], store
 410         move.l  %d0, 4(%a1)
 411         beq     .L512
 412
 413 .L510:  move.l  %acc1, %acc0
 414         asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_A)
 415         mac.l   %d0, %d3, %acc0
 416         move.l  (%a1), %d1
 417         beq     .L503
 418         eor.l   %d1, %d0                | else compare signs
 419         bge     .L507                   | if same, add delta to weight
 420         sub.l   %a3, %d3                | else subtract delta from weight
 421         cmp.l   %d7, %d3                | check for negative clip limit
 422         bge     .L503
 423         move.l  %d7, %d3
 424         bra     .L503
 425
 426 .L507:  add.l   %a3, %d3                | add delta to weight
 427         cmp.l   %d6, %d3                | check for negative clip limit
 428         ble     .L503
 429         move.l  %d6, %d3
 430
 431 .L503:  move.l  %acc0, %d2              | d2 = rounded product
 432         add.l   %d1, %d2                | add applied weight to bptr [1], store
 433         move.l  %d2, (%a1)
 434
 435 .L512:  addq.l  #8, %a1
 436         cmp.l   %a1, %d5                | loop if bptr < eptr
 437         jbhi    term_minus_2
 438         move.l  -8(%a1), 40(%a2)        | dpp->samples_B [0] = bptr [-4]
 439         jbra    finish_up
 440
 441 .L511:  move.l  4(%a1), %d0
 442         beq     .L512
 443         bra     .L510
 444
 445
 446 |------------------------------------------------------------------------------
 447 | Loop to handle term = -3 condition
 448 |
 449 | a0 =                          d0 = decorrelation sample
 450 | a1 = bptr                     d1 = initial bptr [0]
 451 | a2 = dpp->                    d2 = updated bptr [0]
 452 | a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
 453 | a4 =                          d4 = dpp->weight_B << 17
 454 | a5 =                          d5 = eptr
 455 | a6 =                          d6 = 1024 << 17
 456 | a7 =                          d7 = -1024 << 17
 457 | macsr = 0x20                  acc1 = 0x00 0000 80
 458 |------------------------------------------------------------------------------
 459
 460 term_minus_3:
 461         move.l  -4(%a1), %d0            | d0 = bptr [-1]
 462         beq     .L301
 463         move.l  %acc1, %acc0
 464         asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_A)
 465         mac.l   %d0, %d3, %acc0
 466         move.l  (%a1), %d1
 467         beq     .L320
 468         eor.l   %d1, %d0                | else compare signs
 469         bge     .L319                   | if same, add delta to weight
 470         sub.l   %a3, %d3                | else subtract delta from weight
 471         cmp.l   %d7, %d3                | check for negative clip limit
 472         bge     .L320
 473         move.l  %d7, %d3
 474         bra     .L320
 475
 476 .L319:  add.l   %a3, %d3                | add delta to weight
 477         cmp.l   %d6, %d3                | check for positive clip limit
 478         ble     .L320
 479         move.l  %d6, %d3
 480
 481 .L320:  move.l  %acc0, %d2              | d2 = rounded product
 482         add.l   %d1, %d2                | add applied weight to bptr [0], store
 483         move.l  %d2, (%a1)+
 484
 485 .L330:  move.l  -12(%a1), %d0           | d0 = bptr [-2]
 486         beq     .L302
 487         move.l  %acc1, %acc0
 488         asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_B)
 489         mac.l   %d0, %d4, %acc0
 490         move.l  (%a1), %d1
 491         beq     .L318
 492         eor.l   %d1, %d0                | else compare signs
 493         bge     .L322                   | if same, add delta to weight
 494         sub.l   %a3, %d4                | else subtract delta from weight
 495         cmp.l   %d7, %d4                | check for negative clip limit
 496         bge     .L318
 497         move.l  %d7, %d4
 498         bra     .L318
 499
 500 .L322:  add.l   %a3, %d4                | add delta to weight
 501         cmp.l   %d6, %d4                | check for positive clip limit
 502         ble     .L318
 503         move.l  %d6, %d4
 504
 505 .L318:  move.l  %acc0, %d2              | d2 = rounded product
 506         add.l   %d1, %d2                | add applied weight to bptr [1], store
 507         move.l  %d2, (%a1)+
 508
 509 .L331:  cmp.l   %a1, %d5                | bptr, eptr
 510         jbhi    term_minus_3
 511         move.l  -4(%a1), 8(%a2)         | dpp->samples_A [0] = bptr [-1]
 512         move.l  -8(%a1), 40(%a2)        | dpp->samples_B [0] = bptr [-2]
 513         jbra    finish_up
 514
 515 .L301:  addq.l  #4, %a1
 516         bra     .L330
 517
 518 .L302:  addq.l  #4, %a1
 519         bra     .L331
 520
 521 | finish and return
 522
 523 finish_up:
 524         moveq.l #17, %d0
 525         asr.l   %d0, %d3
 526         asr.l   %d0, %d4
 527         move.w  %d3, 4(%a2)     | weight_A, dpp->weight_A
 528         move.w  %d4, 6(%a2)     | weight_B, dpp->weight_B
 529
 530         clr.l   %d0             | clear up EMAC
 531         move.l  %d0, %acc0
 532         move.l  %d0, %acc1
 533
 534 return_only:
 535         movem.l (%sp), %d2-%d7/%a2-%a6
 536         lea     (44,%sp), %sp
 537         rts