4 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5 * Copyright (c) 2006 Mans Rullgard <mans@mansr.com>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
25 #define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
26 #define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
27 #define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
28 #define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
29 #define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
30 #define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
34 #define W13 (W1 | (W3 << 16))
35 #define W26 (W2 | (W6 << 16))
36 #define W57 (W5 | (W7 << 16))
45 .type idct_row_armv5te, %function
46 .func idct_row_armv5te
51 ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */
57 mov v1, #(1<<(ROW_SHIFT-1))
59 sub ip, ip, #1 /* ip = W4 */
60 smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */
61 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
69 ldr ip, [pc, #(w13-.-8)] /* ip = W1 | (W3 << 16) */
70 ldr lr, [pc, #(w57-.-8)] /* lr = W5 | (W7 << 16) */
81 ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */
97 ldr ip, [pc, #(w26-.-8)] /* ip = W2 | (W6 << 16) */
99 sub a2, a2, #1 /* a2 = W4 */
100 smulbb a2, a2, a3 /* a2 = W4*row[4] */
101 smultb lr, ip, a4 /* lr = W6*row[6] */
102 add v1, v1, a2 /* v1 += W4*row[4] */
103 add v1, v1, lr /* v1 += W6*row[6] */
104 add v4, v4, a2 /* v4 += W4*row[4] */
105 sub v4, v4, lr /* v4 -= W6*row[6] */
106 smulbb lr, ip, a4 /* lr = W2*row[6] */
107 sub v2, v2, a2 /* v2 -= W4*row[4] */
108 sub v2, v2, lr /* v2 -= W2*row[6] */
109 sub v3, v3, a2 /* v3 -= W4*row[4] */
110 add v3, v3, lr /* v3 += W2*row[6] */
114 bic a3, a3, #0x1f0000
117 add a3, a3, a2, lsl #16
120 bic a4, a4, #0x1f0000
123 add a4, a4, a2, lsl #16
128 bic a3, a3, #0x1f0000
131 add a3, a3, a2, lsl #16
134 bic a4, a4, #0x1f0000
137 add a4, a4, a2, lsl #16
143 orr a3, a3, a3, lsl #16
154 ldr a4, [a1] /* a4 = col[1:0] */
156 sub ip, ip, #1 /* ip = W4 */
158 mov v1, #(1<<(COL_SHIFT-1))
159 smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */
160 smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */
161 ldr a4, [a1, #(16*4)]
163 mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */
164 add v2, v1, a4, asr #16
165 rsb v2, v2, v2, lsl #14
167 add v1, v1, a4, asr #16
168 ldr a4, [a1, #(16*4)]
169 rsb v1, v1, v1, lsl #14
181 ldr ip, [pc, #(w26-.-8)]
182 ldr a4, [a1, #(16*2)]
196 ldr a4, [a1, #(16*6)]
212 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp}
214 ldr ip, [pc, #(w13-.-8)]
215 ldr a4, [a1, #(16*1)]
216 ldr lr, [pc, #(w57-.-8)]
226 ldr a4, [a1, #(16*3)]
229 smlatb v1, ip, a4, v1
230 smlatb v3, lr, a4, v3
235 smlatt v2, ip, a4, v2
236 smlatt v4, lr, a4, v4
240 ldr a4, [a1, #(16*5)]
243 smlabb v1, lr, a4, v1
244 smlabb v3, ip, a4, v3
245 smlatb v5, lr, a4, v5
246 smlatb v7, ip, a4, v7
247 smlabt v2, lr, a4, v2
248 smlabt v4, ip, a4, v4
249 smlatt v6, lr, a4, v6
250 ldr a3, [a1, #(16*7)]
251 smlatt fp, ip, a4, fp
253 smlatb v1, lr, a3, v1
254 smlabb v3, lr, a3, v3
255 smlatb v5, ip, a3, v5
257 smlatt v2, lr, a3, v2
259 smlabt v4, lr, a3, v4
261 smlatt v6, ip, a3, v6
266 .type idct_col_armv5te, %function
267 .func idct_col_armv5te
276 orrmi a2, a2, #0xf000
279 orr a2, a2, ip, lsl #16
283 orrmi a2, a2, #0xf000
286 orr a2, a2, a4, lsl #16
288 str a2, [a1, #(16*7)]
292 orrmi a2, a2, #0xf000
295 orr a2, a2, ip, lsl #16
296 str a2, [a1, #(16*1)]
299 orrmi a2, a2, #0xf000
302 orr a2, a2, a4, lsl #16
304 str a2, [a1, #(16*6)]
308 orrmi a2, a2, #0xf000
311 orr a2, a2, ip, lsl #16
312 str a2, [a1, #(16*2)]
315 orrmi a2, a2, #0xf000
318 orr a2, a2, a4, lsl #16
320 str a2, [a1, #(16*5)]
324 orrmi a2, a2, #0xf000
327 orr a2, a2, ip, lsl #16
328 str a2, [a1, #(16*3)]
331 orrmi a2, a2, #0xf000
334 orr a2, a2, a4, lsl #16
335 str a2, [a1, #(16*4)]
341 .type idct_col_put_armv5te, %function
342 .func idct_col_put_armv5te
343 idct_col_put_armv5te:
360 orr a2, a2, ip, lsl #8
375 orr a2, a3, a4, lsl #8
376 rsb v2, lr, lr, lsl #3
390 orr a2, a2, ip, lsl #8
402 orr a2, a2, a4, lsl #8
416 orr a2, a2, ip, lsl #8
428 orr a2, a2, a4, lsl #8
442 orr a2, a2, ip, lsl #8
454 orr a2, a2, a4, lsl #8
461 .type idct_col_add_armv5te, %function
462 .func idct_col_add_armv5te
463 idct_col_add_armv5te:
482 adds v1, v1, ip, lsr #8
486 orr a2, a2, v1, lsl #8
489 rsb v2, v1, v1, lsl #3
499 adds a4, a4, ip, lsr #8
505 orr a2, a3, a4, lsl #8
520 adds v3, v3, ip, lsr #8
524 orr a2, a2, v3, lsl #8
535 adds a4, a4, ip, lsr #8
539 orr a2, a3, a4, lsl #8
554 adds v3, v3, ip, lsr #8
558 orr a2, a2, v3, lsl #8
569 adds a4, a4, ip, lsr #8
573 orr a2, a3, a4, lsl #8
588 adds v3, v3, ip, lsr #8
592 orr a2, a2, v3, lsl #8
603 adds a4, a4, ip, lsr #8
607 orr a2, a3, a4, lsl #8
614 .global simple_idct_armv5te
615 .type simple_idct_armv5te, %function
616 .func simple_idct_armv5te
618 stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
646 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
650 .global simple_idct_add_armv5te
651 .type simple_idct_add_armv5te, %function
652 .func simple_idct_add_armv5te
653 simple_idct_add_armv5te:
654 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
676 bl idct_col_add_armv5te
678 bl idct_col_add_armv5te
680 bl idct_col_add_armv5te
682 bl idct_col_add_armv5te
685 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
689 .global simple_idct_put_armv5te
690 .type simple_idct_put_armv5te, %function
691 .func simple_idct_put_armv5te
692 simple_idct_put_armv5te:
693 stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
715 bl idct_col_put_armv5te
717 bl idct_col_put_armv5te
719 bl idct_col_put_armv5te
721 bl idct_col_put_armv5te
724 ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}