4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
27 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
28 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
29 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
30 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
31 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
32 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
36 #define W13 (W1 | (W3 << 16))
37 #define W26 (W2 | (W6 << 16))
38 #define W57 (W5 | (W7 << 16))
46 function idct_row_armv5te
50 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
56 mov v1, #(1<<(ROW_SHIFT-1))
58 sub ip, ip, #1 /* ip = W4 */
59 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
60 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
68 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
69 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */
80 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
96 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
98 sub a2, a2, #1 /* a2 = W4 */
99 smulbb a2, a2, a3 /* a2 = W4*row[4] */
100 smultb lr, ip, a4 /* lr = W6*row[6] */
101 add v1, v1, a2 /* v1 += W4*row[4] */
102 add v1, v1, lr /* v1 += W6*row[6] */
103 add v4, v4, a2 /* v4 += W4*row[4] */
104 sub v4, v4, lr /* v4 -= W6*row[6] */
105 smulbb lr, ip, a4 /* lr = W2*row[6] */
106 sub v2, v2, a2 /* v2 -= W4*row[4] */
107 sub v2, v2, lr /* v2 -= W2*row[6] */
108 sub v3, v3, a2 /* v3 -= W4*row[4] */
109 add v3, v3, lr /* v3 += W2*row[6] */
113 bic a3, a3, #0x1f0000
116 add a3, a3, a2, lsl #16
119 bic a4, a4, #0x1f0000
122 add a4, a4, a2, lsl #16
127 bic a3, a3, #0x1f0000
130 add a3, a3, a2, lsl #16
133 bic a4, a4, #0x1f0000
136 add a4, a4, a2, lsl #16
142 orr a3, a3, a3, lsl #16
153 ldr a4, [a1] /* a4 = col[1:0] */
155 sub ip, ip, #1 /* ip = W4 */
157 mov v1, #(1<<(COL_SHIFT-1))
158 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
159 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
160 ldr a4, [a1, #(16*4)]
162 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
163 add v2, v1, a4, asr #16
164 rsb v2, v2, v2, lsl #14
166 add v1, v1, a4, asr #16
167 ldr a4, [a1, #(16*4)]
168 rsb v1, v1, v1, lsl #14
180 ldr ip, [pc, #(w26-.-8)]
181 ldr a4, [a1, #(16*2)]
195 ldr a4, [a1, #(16*6)]
211 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
213 ldr ip, [pc, #(w13-.-8)]
214 ldr a4, [a1, #(16*1)]
215 ldr lr, [pc, #(w57-.-8)]
225 ldr a4, [a1, #(16*3)]
228 smlatb v1, ip, a4, v1
229 smlatb v3, lr, a4, v3
234 smlatt v2, ip, a4, v2
235 smlatt v4, lr, a4, v4
239 ldr a4, [a1, #(16*5)]
242 smlabb v1, lr, a4, v1
243 smlabb v3, ip, a4, v3
244 smlatb v5, lr, a4, v5
245 smlatb v7, ip, a4, v7
246 smlabt v2, lr, a4, v2
247 smlabt v4, ip, a4, v4
248 smlatt v6, lr, a4, v6
249 ldr a3, [a1, #(16*7)]
250 smlatt fp, ip, a4, fp
252 smlatb v1, lr, a3, v1
253 smlabb v3, lr, a3, v3
254 smlatb v5, ip, a3, v5
256 smlatt v2, lr, a3, v2
258 smlabt v4, lr, a3, v4
260 smlatt v6, ip, a3, v6
264 function idct_col_armv5te
272 orrmi a2, a2, #0xf000
275 orr a2, a2, ip, lsl #16
279 orrmi a2, a2, #0xf000
282 orr a2, a2, a4, lsl #16
284 str a2, [a1, #(16*7)]
288 orrmi a2, a2, #0xf000
291 orr a2, a2, ip, lsl #16
292 str a2, [a1, #(16*1)]
295 orrmi a2, a2, #0xf000
298 orr a2, a2, a4, lsl #16
300 str a2, [a1, #(16*6)]
304 orrmi a2, a2, #0xf000
307 orr a2, a2, ip, lsl #16
308 str a2, [a1, #(16*2)]
311 orrmi a2, a2, #0xf000
314 orr a2, a2, a4, lsl #16
316 str a2, [a1, #(16*5)]
320 orrmi a2, a2, #0xf000
323 orr a2, a2, ip, lsl #16
324 str a2, [a1, #(16*3)]
327 orrmi a2, a2, #0xf000
330 orr a2, a2, a4, lsl #16
331 str a2, [a1, #(16*4)]
336 function idct_col_put_armv5te
353 orr a2, a2, ip, lsl #8
368 orr a2, a3, a4, lsl #8
369 rsb v2, lr, lr, lsl #3
383 orr a2, a2, ip, lsl #8
395 orr a2, a2, a4, lsl #8
409 orr a2, a2, ip, lsl #8
421 orr a2, a2, a4, lsl #8
435 orr a2, a2, ip, lsl #8
447 orr a2, a2, a4, lsl #8
453 function idct_col_add_armv5te
472 adds v1, v1, ip, lsr #8
476 orr a2, a2, v1, lsl #8
479 rsb v2, v1, v1, lsl #3
489 adds a4, a4, ip, lsr #8
495 orr a2, a3, a4, lsl #8
510 adds v3, v3, ip, lsr #8
514 orr a2, a2, v3, lsl #8
525 adds a4, a4, ip, lsr #8
529 orr a2, a3, a4, lsl #8
544 adds v3, v3, ip, lsr #8
548 orr a2, a2, v3, lsl #8
559 adds a4, a4, ip, lsr #8
563 orr a2, a3, a4, lsl #8
578 adds v3, v3, ip, lsr #8
582 orr a2, a2, v3, lsl #8
593 adds a4, a4, ip, lsr #8
597 orr a2, a3, a4, lsl #8
603 function simple_idct_armv5te, export=1
604 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
632 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
635 function simple_idct_add_armv5te, export=1
636 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
658 bl idct_col_add_armv5te
660 bl idct_col_add_armv5te
662 bl idct_col_add_armv5te
664 bl idct_col_add_armv5te
667 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
670 function simple_idct_put_armv5te, export=1
671 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
693 bl idct_col_put_armv5te
695 bl idct_col_put_armv5te
697 bl idct_col_put_armv5te
699 bl idct_col_put_armv5te
702 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}