2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
11 EXPORT |vp8_short_inv_walsh4x4_v6|
12 EXPORT |vp8_short_inv_walsh4x4_1_v6|
18 AREA |.text|
, CODE
, READONLY
; name this block of code
20 ;short vp8_short_inv_walsh4x4_v6(short *input, short *output)
21 |vp8_short_inv_walsh4x4_v6|
PROC
23 stmdb
sp!, {r4 - r11, lr}
25 ldr r2
, [r0
], #
4 ; [1 | 0]
26 ldr r3
, [r0
], #
4 ; [3 | 2]
27 ldr r4
, [r0
], #
4 ; [5 | 4]
28 ldr r5
, [r0
], #
4 ; [7 | 6]
29 ldr r6
, [r0
], #
4 ; [9 | 8]
30 ldr r7
, [r0
], #
4 ; [11 | 10]
31 ldr r8
, [r0
], #
4 ; [13 | 12]
32 ldr r9
, [r0
] ; [15 | 14]
34 qadd16 r10
, r2
, r8
; a1 [1+13 | 0+12]
35 qadd16 r11
, r4
, r6
; b1 [5+9 | 4+8]
36 qsub16 r12
, r4
, r6
; c1 [5-9 | 4-8]
37 qsub16 lr
, r2
, r8
; d1 [1-13 | 0-12]
39 qadd16 r2
, r10
, r11
; a1 + b1 [1 | 0]
40 qadd16 r4
, r12
, lr
; c1 + d1 [5 | 4]
41 qsub16 r6
, r10
, r11
; a1 - b1 [9 | 8]
42 qsub16 r8
, lr
, r12
; d1 - c1 [13 | 12]
44 qadd16 r10
, r3
, r9
; a1 [3+15 | 2+14]
45 qadd16 r11
, r5
, r7
; b1 [7+11 | 6+10]
46 qsub16 r12
, r5
, r7
; c1 [7-11 | 6-10]
47 qsub16 lr
, r3
, r9
; d1 [3-15 | 2-14]
49 qadd16 r3
, r10
, r11
; a1 + b1 [3 | 2]
50 qadd16 r5
, r12
, lr
; c1 + d1 [7 | 6]
51 qsub16 r7
, r10
, r11
; a1 - b1 [11 | 10]
52 qsub16 r9
, lr
, r12
; d1 - c1 [15 | 14]
54 ; first transform complete
56 qsubaddx r10
, r2
, r3
; [c1|a1] [1-2 | 0+3]
57 qaddsubx r11
, r2
, r3
; [b1|d1] [1+2 | 0-3]
58 qsubaddx r12
, r4
, r5
; [c1|a1] [5-6 | 4+7]
59 qaddsubx lr
, r4
, r5
; [b1|d1] [5+6 | 4-7]
61 qaddsubx r2
, r10
, r11
; [b2|c2] [c1+d1 | a1-b1]
62 qaddsubx r3
, r11
, r10
; [a2|d2] [b1+a1 | d1-c1]
64 qaddsubx r4
, r12
, lr
; [b2|c2] [c1+d1 | a1-b1]
65 qaddsubx r5
, lr
, r12
; [a2|d2] [b1+a1 | d1-c1]
67 qadd16 r2
, r2
, r10
; [b2+3|c2+3]
68 qadd16 r3
, r3
, r10
; [a2+3|d2+3]
69 qadd16 r4
, r4
, r10
; [b2+3|c2+3]
70 qadd16 r5
, r5
, r10
; [a2+3|d2+3]
72 asr r12
, r2
, #
3 ; [1 | x]
73 pkhtb r12
, r12
, r3
, asr #
19; [1 | 0]
74 lsl lr
, r3
, #
16 ; [~3 | x]
75 lsl r2
, r2
, #
16 ; [~2 | x]
76 asr lr
, lr
, #
3 ; [3 | x]
77 pkhtb lr
, lr
, r2
, asr #
19 ; [3 | 2]
79 asr r2
, r4
, #
3 ; [5 | x]
80 pkhtb r2
, r2
, r5
, asr #
19 ; [5 | 4]
81 lsl r3
, r5
, #
16 ; [~7 | x]
82 lsl r4
, r4
, #
16 ; [~6 | x]
83 asr r3
, r3
, #
3 ; [7 | x]
84 pkhtb r3
, r3
, r4
, asr #
19 ; [7 | 6]
91 qsubaddx r2
, r6
, r7
; [c1|a1] [9-10 | 8+11]
92 qaddsubx r3
, r6
, r7
; [b1|d1] [9+10 | 8-11]
93 qsubaddx r4
, r8
, r9
; [c1|a1] [13-14 | 12+15]
94 qaddsubx r5
, r8
, r9
; [b1|d1] [13+14 | 12-15]
96 qaddsubx r6
, r2
, r3
; [b2|c2] [c1+d1 | a1-b1]
97 qaddsubx r7
, r3
, r2
; [a2|d2] [b1+a1 | d1-c1]
98 qaddsubx r8
, r4
, r5
; [b2|c2] [c1+d1 | a1-b1]
99 qaddsubx r9
, r5
, r4
; [a2|d2] [b1+a1 | d1-c1]
101 qadd16 r6
, r6
, r10
; [b2+3|c2+3]
102 qadd16 r7
, r7
, r10
; [a2+3|d2+3]
103 qadd16 r8
, r8
, r10
; [b2+3|c2+3]
104 qadd16 r9
, r9
, r10
; [a2+3|d2+3]
106 asr r2
, r6
, #
3 ; [9 | x]
107 pkhtb r2
, r2
, r7
, asr #
19 ; [9 | 8]
108 lsl r3
, r7
, #
16 ; [~11| x]
109 lsl r4
, r6
, #
16 ; [~10| x]
110 asr r3
, r3
, #
3 ; [11 | x]
111 pkhtb r3
, r3
, r4
, asr #
19 ; [11 | 10]
113 asr r4
, r8
, #
3 ; [13 | x]
114 pkhtb r4
, r4
, r9
, asr #
19 ; [13 | 12]
115 lsl r5
, r9
, #
16 ; [~15| x]
116 lsl r6
, r8
, #
16 ; [~14| x]
117 asr r5
, r5
, #
3 ; [15 | x]
118 pkhtb r5
, r5
, r6
, asr #
19 ; [15 | 14]
125 ldmia
sp!, {r4 - r11, pc}
126 ENDP ; |vp8_short_inv_walsh4x4_v6|
129 ;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
130 |vp8_short_inv_walsh4x4_1_v6|
PROC
133 add r2
, r2
, #
3 ; [0] + 3
134 asr r2
, r2
, #
3 ; a1 ([0]+3) >> 3
135 lsl r2
, r2
, #
16 ; [a1 | x]
136 orr r2
, r2
, r2
, lsr #
16 ; [a1 | a1]
148 ENDP ; |vp8_short_inv_walsh4x4_1_v6|
151 c0x00030003 DCD
0x00030003