3 libdemac - A Monkey's Audio decoder
7 Copyright (C) Dave Chapman 2007
9 Coldfire predictor copyright (C) 2007 Jens Arnold
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
26 #include "demac_config.h"
28 /* NOTE: The following need to be kept in sync with parser.h */
34 #define YADAPTCOEFFSA 72
35 #define XADAPTCOEFFSA 56
36 #define YADAPTCOEFFSB 40
37 #define XADAPTCOEFFSB 20
39 /* struct predictor_t members: */
40 #define buf 0 /* int32_t* buf */
42 #define YlastA 4 /* int32_t YlastA; */
43 #define XlastA 8 /* int32_t XlastA; */
45 #define YfilterB 12 /* int32_t YfilterB; */
46 #define XfilterA 16 /* int32_t XfilterA; */
48 #define XfilterB 20 /* int32_t XfilterB; */
49 #define YfilterA 24 /* int32_t YfilterA; */
51 #define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
52 #define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
53 #define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
54 #define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
56 #define historybuffer 100 /* int32_t historybuffer[] */
63 .global predictor_decode_stereo
64 .type predictor_decode_stereo,@function
66 | void predictor_decode_stereo(struct predictor_t* p,
71 predictor_decode_stereo:
72 lea.l (-12*4,%sp), %sp
73 movem.l %d2-%d7/%a2-%a6, (4,%sp)
75 movem.l (12*4+8,%sp), %a3-%a5 | %a3 = decoded0
77 move.l %a5, (%sp) | (%sp) = count
79 move.l #0, %macsr | signed integer mode
80 move.l (12*4+4,%sp), %a6 | %a6 = p
81 move.l (%a6), %a5 | %a5 = p->buf
85 | ***** PREDICTOR Y *****
87 | Predictor Y, Filter A
89 move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
91 movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
92 | %d1 = p->buf[YDELAYA-2]
93 | %d2 = p->buf[YDELAYA-1]
95 move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
98 neg.l %d2 | %d2 = %d3 - %d2
100 move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
102 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
103 | %d5 = p->YcoeffsA[1]
104 | %d6 = p->YcoeffsA[2]
105 | %d7 = p->YcoeffsA[3]
107 mac.l %d3, %d4, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
108 mac.l %d2, %d5, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
109 mac.l %d1, %d6, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
110 mac.l %d0, %d7, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
114 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
115 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
116 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
118 move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
126 move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
128 | Predictor Y, Filter B
130 movem.l (YfilterB,%a6), %d2-%d3 | %d2 = p->YfilterB
132 move.l %d3, (YfilterB,%a6) | p->YfilterB = %d3
134 move.l %d2, %d1 | %d1 = %d2
135 lsl.l #5, %d2 | %d2 = %d2 * 32
136 sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
137 asr.l #5, %d2 | %d2 >>= 5
138 sub.l %d2, %d3 | %d3 -= %d2
140 movem.l (YDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[YDELAYB-4]
141 | %d5 = p->buf[YDELAYB-3]
142 | %d6 = p->buf[YDELAYB-2]
143 | %d7 = p->buf[YDELAYB-1]
145 neg.l %d7 | %d7 = %d3 - %d7
147 move.l %d7, (YDELAYB-4,%a5) | p->buf[YDELAYB-1] = %d7
149 movem.l (YcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->YcoeffsB[0]
150 | %d2 = p->YcoeffsB[1]
151 | %a0 = p->YcoeffsB[2]
152 | %a1 = p->YcoeffsB[3]
153 | %a2 = p->YcoeffsB[4]
155 mac.l %d3, %d1, %acc1 | %acc1 = p->buf[YDELAYB] * p->YcoeffsB[0]
156 mac.l %d7, %d2, %acc1 | %acc1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
157 mac.l %d6, %a0, %acc1 | %acc1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
158 mac.l %d5, %a1, %acc1 | %acc1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
159 mac.l %d4, %a2, %acc1 | %acc1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
161 move.l %d3, (YDELAYB, %a5) | p->buf[YDELAYB] = %d3
169 move.l %d7, (YADAPTCOEFFSB-4,%a5) | p->buf[YADAPTCOEFFSB-1] = %d7
176 move.l %d3, (YADAPTCOEFFSB, %a5) | p->buf[YADAPTCOEFFSB] = %d3
178 | %d1, %d2, %a0, %a1, %a2 contain p->YcoeffsB[0..4]
179 | %d7, %d3 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
181 move.l (%a3), %d0 | %d0 = *decoded0
184 movem.l (YADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[YADAPTCOEFFSB-4]
185 | %d5 = p->buf[YADAPTCOEFFSB-3]
186 | %d6 = p->buf[YADAPTCOEFFSB-2]
188 bmi.s 1f | flags still valid here
192 sub.l %d3, %d1 | %d1 = p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
193 sub.l %d7, %d2 | %d2 = p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
194 sub.l %d6, %a0 | %a0 = p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
195 sub.l %d5, %a1 | %a1 = p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
196 sub.l %d4, %a2 | %a2 = p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
198 movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
200 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
201 | %d5 = p->YcoeffsA[1]
202 | %d6 = p->YcoeffsA[2]
203 | %d7 = p->YcoeffsA[3]
205 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
206 | %d2 = p->buf[YADAPTCOEFFSA-3]
207 | %a0 = p->buf[YADAPTCOEFFSA-2]
208 | %a1 = p->buf[YADAPTCOEFFSA-1]
209 | %a2 = p->buf[YADAPTCOEFFSA]
211 sub.l %a2, %d4 | %d4 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
212 sub.l %a1, %d5 | %d5 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
213 sub.l %a0, %d6 | %d6 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
214 sub.l %d2, %d7 | %d7 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
220 add.l %d3, %d1 | %d1 = p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
221 add.l %d7, %d2 | %d2 = p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
222 add.l %d6, %a0 | %a0 = p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
223 add.l %d5, %a1 | %a1 = p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
224 add.l %d4, %a2 | %a2 = p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
226 movem.l %d1-%d2/%a0-%a2, (YcoeffsB,%a6) | Save p->YcoeffsB[]
228 movem.l (YcoeffsA,%a6), %d4-%d7 | %d4 = p->YcoeffsA[0]
229 | %d5 = p->YcoeffsA[1]
230 | %d6 = p->YcoeffsA[2]
231 | %d7 = p->YcoeffsA[3]
233 movem.l (YADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
234 | %d2 = p->buf[YADAPTCOEFFSA-3]
235 | %a0 = p->buf[YADAPTCOEFFSA-2]
236 | %a1 = p->buf[YADAPTCOEFFSA-1]
237 | %a2 = p->buf[YADAPTCOEFFSA]
239 add.l %a2, %d4 | %d4 = p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
240 add.l %a1, %d5 | %d5 = p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
241 add.l %a0, %d6 | %d6 = p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
242 add.l %d2, %d7 | %d7 = p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
245 movem.l %d4-%d7, (YcoeffsA,%a6) | Save p->YcoeffsA[]
250 movclr.l %acc0, %d1 | %d1 = predictionA
251 movclr.l %acc1, %d2 | %d2 = predictionB
253 add.l %d2, %d1 | %d1 += (%d2 >> 1)
255 asr.l #2, %d1 | %d1 >>= 10
256 add.l %d0, %d1 | %d1 += %d0
257 move.l %d1, (YlastA,%a6) | p->YlastA = %d1
259 move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
262 sub.l %d0, %d2 | %d2 = 31 * %d2
263 asr.l #5, %d2 | %d2 >>= 5
265 move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
267 | *decoded0 stored 2 instructions down, avoiding pipeline stall
269 | ***** PREDICTOR X *****
271 | Predictor X, Filter A
273 move.l (XlastA,%a6), %d3 | %d3 = p->XlastA
275 move.l %d2, (%a3)+ | *(decoded0++) = %d2 (p->YfilterA)
277 movem.l (XDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[XDELAYA-3]
278 | %d1 = p->buf[XDELAYA-2]
279 | %d2 = p->buf[XDELAYA-1]
281 move.l %d3, (XDELAYA,%a5) | p->buf[XDELAYA] = %d3
284 neg.l %d2 | %d2 = %d3 -%d2
286 move.l %d2, (XDELAYA-4,%a5) | p->buf[XDELAYA-1] = %d2
288 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
289 | %d5 = p->XcoeffsA[1]
290 | %d6 = p->XcoeffsA[2]
291 | %d7 = p->XcoeffsA[3]
293 mac.l %d3, %d4, %acc0 | %acc0 = p->buf[XDELAYA] * p->XcoeffsA[0]
294 mac.l %d2, %d5, %acc0 | %acc0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
295 mac.l %d1, %d6, %acc0 | %acc0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
296 mac.l %d0, %d7, %acc0 | %acc0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
300 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
301 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
302 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
304 move.l %d2, (XADAPTCOEFFSA-4,%a5) | p->buf[XADAPTCOEFFSA-1] = %d2
312 move.l %d3, (XADAPTCOEFFSA,%a5) | p->buf[XADAPTCOEFFSA] = %d3
314 | Predictor X, Filter B
316 movem.l (XfilterB,%a6), %d2-%d3 | %d2 = p->XfilterB
318 move.l %d3, (XfilterB,%a6) | p->XfilterB = %d3
320 move.l %d2, %d1 | %d1 = %d2
321 lsl.l #5, %d2 | %d2 = %d2 * 32
322 sub.l %d1, %d2 | %d2 -= %d1 (== 31 * old_d2)
323 asr.l #5, %d2 | %d2 >>= 5
324 sub.l %d2, %d3 | %d3 -= %d2
326 movem.l (XDELAYB-16,%a5), %d4-%d7 | %d4 = p->buf[XDELAYB-4]
327 | %d5 = p->buf[XDELAYB-3]
328 | %d6 = p->buf[XDELAYB-2]
329 | %d7 = p->buf[XDELAYB-1]
331 neg.l %d7 | %d7 = %d3 - %d7
333 move.l %d7, (XDELAYB-4,%a5) | p->buf[XDELAYB-1] = %d7
335 movem.l (XcoeffsB,%a6), %d1-%d2/%a0-%a2 | %d1 = p->XcoeffsB[0]
336 | %d2 = p->XcoeffsB[1]
337 | %a0 = p->XcoeffsB[2]
338 | %a1 = p->XcoeffsB[3]
339 | %a2 = p->XcoeffsB[4]
341 mac.l %d3, %d1, %acc1 | %acc1 = p->buf[XDELAYB] * p->XcoeffsB[0]
342 mac.l %d7, %d2, %acc1 | %acc1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
343 mac.l %d6, %a0, %acc1 | %acc1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
344 mac.l %d5, %a1, %acc1 | %acc1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
345 mac.l %d4, %a2, %acc1 | %acc1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
347 move.l %d3, (XDELAYB, %a5) | p->buf[XDELAYB] = %d3
355 move.l %d7, (XADAPTCOEFFSB-4,%a5) | p->buf[XADAPTCOEFFSB-1] = %d7
363 move.l %d3, (XADAPTCOEFFSB, %a5) | p->buf[XADAPTCOEFFSB] = %d3
365 | %d1, %d2, %a0, %a1, %a2 contain p->XcoeffsB[0..4]
366 | %d7, %d3 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
368 move.l (%a4), %d0 | %d0 = *decoded1
371 movem.l (XADAPTCOEFFSB-16,%a5), %d4-%d6 | %d4 = p->buf[XADAPTCOEFFSB-4]
372 | %d5 = p->buf[XADAPTCOEFFSB-3]
373 | %d6 = p->buf[XADAPTCOEFFSB-2]
375 bmi.s 1f | flags still valid here
379 sub.l %d3, %d1 | %d1 = p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
380 sub.l %d7, %d2 | %d2 = p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
381 sub.l %d6, %a0 | %a0 = p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
382 sub.l %d5, %a1 | %a1 = p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
383 sub.l %d4, %a2 | %a2 = p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
385 movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
387 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
388 | %d5 = p->XcoeffsA[1]
389 | %d6 = p->XcoeffsA[2]
390 | %d7 = p->XcoeffsA[3]
392 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
393 | %d2 = p->buf[XADAPTCOEFFSA-3]
394 | %a0 = p->buf[XADAPTCOEFFSA-2]
395 | %a1 = p->buf[XADAPTCOEFFSA-1]
396 | %a2 = p->buf[XADAPTCOEFFSA]
398 sub.l %a2, %d4 | %d4 = p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
399 sub.l %a1, %d5 | %d5 = p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
400 sub.l %a0, %d6 | %d6 = p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
401 sub.l %d2, %d7 | %d7 = p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
407 add.l %d3, %d1 | %d1 = p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
408 add.l %d7, %d2 | %d2 = p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
409 add.l %d6, %a0 | %a0 = p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
410 add.l %d5, %a1 | %a1 = p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
411 add.l %d4, %a2 | %a2 = p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
413 movem.l %d1-%d2/%a0-%a2, (XcoeffsB,%a6) | Save p->XcoeffsB[]
415 movem.l (XcoeffsA,%a6), %d4-%d7 | %d4 = p->XcoeffsA[0]
416 | %d5 = p->XcoeffsA[1]
417 | %d6 = p->XcoeffsA[2]
418 | %d7 = p->XcoeffsA[3]
420 movem.l (XADAPTCOEFFSA-12,%a5), %d2/%a0-%a2
421 | %d2 = p->buf[XADAPTCOEFFSA-3]
422 | %a0 = p->buf[XADAPTCOEFFSA-2]
423 | %a1 = p->buf[XADAPTCOEFFSA-1]
424 | %a2 = p->buf[XADAPTCOEFFSA]
426 add.l %a2, %d4 | %d4 = p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
427 add.l %a1, %d5 | %d5 = p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
428 add.l %a0, %d6 | %d6 = p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
429 add.l %d2, %d7 | %d7 = p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
432 movem.l %d4-%d7, (XcoeffsA,%a6) | Save p->XcoeffsA[]
437 movclr.l %acc0, %d1 | %d1 = predictionA
438 movclr.l %acc1, %d2 | %d2 = predictionB
440 add.l %d2, %d1 | %d1 += (%d2 >> 1)
442 asr.l #2, %d1 | %d1 >>= 10
443 add.l %d0, %d1 | %d1 += %d0
444 move.l %d1, (XlastA,%a6) | p->XlastA = %d1
446 move.l (XfilterA,%a6), %d2 | %d2 = p->XfilterA
449 sub.l %d0, %d2 | %d2 = 31 * %d2
450 asr.l #5, %d2 | %d6 >>= 2
452 move.l %d2, (XfilterA,%a6) | p->XfilterA = %d2
454 | *decoded1 stored 3 instructions down, avoiding pipeline stall
458 addq.l #4, %a5 | p->buf++
459 lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a2
460 | %a2 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
462 move.l %d2, (%a4)+ | *(decoded1++) = %d2 (p->XfilterA)
465 beq.s .move_hist | History buffer is full, we need to do a memmove
467 subq.l #1, (%sp) | decrease loop count
471 move.l %a5, (%a6) | Save value of p->buf
472 movem.l (4,%sp), %d2-%d7/%a2-%a6
473 lea.l (12*4,%sp), %sp
477 lea.l (historybuffer,%a6), %a2
479 | dest = %a2 (p->historybuffer)
483 movem.l (%a5), %d0-%d7/%a0-%a1 | 40 bytes
484 movem.l %d0-%d7/%a0-%a1, (%a2)
485 movem.l (40,%a5), %d0-%d7/%a0-%a1 | 40 bytes
486 movem.l %d0-%d7/%a0-%a1, (40,%a2)
487 movem.l (80,%a5), %d0-%d7/%a0-%a1 | 40 bytes
488 movem.l %d0-%d7/%a0-%a1, (80,%a2)
489 movem.l (120,%a5), %d0-%d7/%a0-%a1 | 40 bytes
490 movem.l %d0-%d7/%a0-%a1, (120,%a2)
491 movem.l (160,%a5), %d0-%d7/%a0-%a1 | 40 bytes
492 movem.l %d0-%d7/%a0-%a1, (160,%a2)
494 move.l %a2, %a5 | p->buf = &p->historybuffer[0]
496 subq.l #1, (%sp) | decrease loop count
500 .size predictor_decode_stereo, .-predictor_decode_stereo
503 .global predictor_decode_mono
504 .type predictor_decode_mono,@function
506 | void predictor_decode_mono(struct predictor_t* p,
510 predictor_decode_mono:
511 lea.l (-11*4,%sp), %sp
512 movem.l %d2-%d7/%a2-%a6, (%sp)
514 move.l #0, %macsr | signed integer mode
516 move.l (11*4+4,%sp), %a6 | %a6 = p
517 move.l (11*4+8,%sp), %a4 | %a4 = decoded0
518 move.l (11*4+12,%sp), %d7 | %d7 = count
519 move.l (%a6), %a5 | %a5 = p->buf
521 move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
525 | ***** PREDICTOR *****
527 movem.l (YDELAYA-12,%a5), %d0-%d2 | %d0 = p->buf[YDELAYA-3]
528 | %d1 = p->buf[YDELAYA-2]
529 | %d2 = p->buf[YDELAYA-1]
531 move.l %d3, (YDELAYA,%a5) | p->buf[YDELAYA] = %d3
534 neg.l %d2 | %d2 = %d3 - %d2
536 move.l %d2, (YDELAYA-4,%a5) | p->buf[YDELAYA-1] = %d2
538 movem.l (YcoeffsA,%a6), %a0-%a3 | %a0 = p->YcoeffsA[0]
539 | %a1 = p->YcoeffsA[1]
540 | %a2 = p->YcoeffsA[2]
541 | %a3 = p->YcoeffsA[3]
543 mac.l %d3, %a0, %acc0 | %acc0 = p->buf[YDELAYA] * p->YcoeffsA[0]
544 mac.l %d2, %a1, %acc0 | %acc0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
545 mac.l %d1, %a2, %acc0 | %acc0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
546 mac.l %d0, %a3, %acc0 | %acc0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
550 spl.b %d2 | pos: 0x??????ff, neg: 0x??????00
551 extb.l %d2 | pos: 0xffffffff, neg: 0x00000000
552 or.l #1, %d2 | pos: 0xffffffff, neg: 0x00000001
554 move.l %d2, (YADAPTCOEFFSA-4,%a5) | p->buf[YADAPTCOEFFSA-1] = %d2
562 move.l %d3, (YADAPTCOEFFSA,%a5) | p->buf[YADAPTCOEFFSA] = %d3
564 move.l (%a4), %d0 | %d0 = *decoded0
567 movem.l (YADAPTCOEFFSA-12,%a5),%d4-%d5 | %d4 = p->buf[YADAPTCOEFFSA-3]
568 | %d5 = p->buf[YADAPTCOEFFSA-2]
570 bmi.s 1f | flags still valid here
574 sub.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
575 sub.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
576 sub.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
577 sub.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
583 add.l %d3, %a0 | %a0 = p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
584 add.l %d2, %a1 | %a1 = p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
585 add.l %d5, %a2 | %a2 = p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
586 add.l %d4, %a3 | %a3 = p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
589 movem.l %a0-%a3, (YcoeffsA,%a6) | save p->YcoeffsA[]
594 movclr.l %acc0, %d3 | %d3 = predictionA
596 asr.l #2, %d3 | %d3 >>= 10
597 add.l %d0, %d3 | %d3 += %d0
599 move.l (YfilterA,%a6), %d2 | %d2 = p->YfilterA
602 sub.l %d0, %d2 | %d2 = 31 * %d2
603 asr.l #5, %d2 | %d2 >>= 5
605 move.l %d2, (YfilterA,%a6) | p->YfilterA = %d2
607 | *decoded0 stored 3 instructions down, avoiding pipeline stall
611 addq.l #4, %a5 | p->buf++
612 lea.l (historybuffer+PREDICTOR_HISTORY_SIZE*4,%a6), %a3
613 | %a3 = &p->historybuffer[PREDICTOR_HISTORY_SIZE]
615 move.l %d2, (%a4)+ | *(decoded0++) = %d2 (p->YfilterA)
618 beq.s .move_histm | History buffer is full, we need to do a memmove
620 subq.l #1, %d7 | decrease loop count
623 move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
626 move.l %a5, (%a6) | Save value of p->buf
627 movem.l (%sp), %d2-%d7/%a2-%a6
628 lea.l (11*4,%sp), %sp
632 move.l %d3, (YlastA,%a6) | %d3 = p->YlastA
634 lea.l (historybuffer,%a6), %a3
636 | dest = %a3 (p->historybuffer)
640 movem.l (%a5), %d0-%d6/%a0-%a2 | 40 bytes
641 movem.l %d0-%d6/%a0-%a2, (%a3)
642 movem.l (40,%a5), %d0-%d6/%a0-%a2 | 40 bytes
643 movem.l %d0-%d6/%a0-%a2, (40,%a3)
644 movem.l (80,%a5), %d0-%d6/%a0-%a2 | 40 bytes
645 movem.l %d0-%d6/%a0-%a2, (80,%a3)
646 movem.l (120,%a5), %d0-%d6/%a0-%a2 | 40 bytes
647 movem.l %d0-%d6/%a0-%a2, (120,%a3)
648 movem.l (160,%a5), %d0-%d6/%a0-%a2 | 40 bytes
649 movem.l %d0-%d6/%a0-%a2, (160,%a3)
651 move.l %a3, %a5 | p->buf = &p->historybuffer[0]
653 move.l (YlastA,%a6), %d3 | %d3 = p->YlastA
655 subq.l #1, %d7 | decrease loop count
659 .size predictor_decode_mono, .-predictor_decode_mono