Initial WebM release
[libvpx.git] / vp8 / common / arm / armv6 / recon_v6.asm
blob085ff80c901d53cd5476e1107abf95a5126a8208
2 ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree.
11 EXPORT |vp8_recon_b_armv6|
12 EXPORT |vp8_recon2b_armv6|
13 EXPORT |vp8_recon4b_armv6|
15 AREA |.text|, CODE, READONLY ; name this block of code
16 prd RN r0
17 dif RN r1
18 dst RN r2
19 stride RN r3
21 ;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride)
22 ; R0 char* pred_ptr
23 ; R1 short * dif_ptr
24 ; R2 char * dst_ptr
25 ; R3 int stride
27 ; Description:
28 ; Loop through the block adding the Pred and Diff together. Clamp and then
29 ; store back into the Dst.
31 ; Restrictions :
32 ; all buffers are expected to be 4 byte aligned coming in and
33 ; going out.
34 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
38 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
39 |vp8_recon_b_armv6| PROC
40 stmdb sp!, {r4 - r9, lr}
42 ;0, 1, 2, 3
43 ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
44 ldr r6, [dif, #0] ; 1 | 0
45 ldr r7, [dif, #4] ; 3 | 2
47 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
48 pkhtb r9, r7, r6, asr #16 ; 3 | 1
50 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
51 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
53 usat16 r8, #8, r8
54 usat16 r9, #8, r9
55 add dif, dif, #32
56 orr r8, r8, r9, lsl #8
58 str r8, [dst], stride
60 ;0, 1, 2, 3
61 ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
62 ;; ldr r6, [dif, #8] ; 1 | 0
63 ;; ldr r7, [dif, #12] ; 3 | 2
64 ldr r6, [dif, #0] ; 1 | 0
65 ldr r7, [dif, #4] ; 3 | 2
67 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
68 pkhtb r9, r7, r6, asr #16 ; 3 | 1
70 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
71 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
73 usat16 r8, #8, r8
74 usat16 r9, #8, r9
75 add dif, dif, #32
76 orr r8, r8, r9, lsl #8
78 str r8, [dst], stride
80 ;0, 1, 2, 3
81 ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
82 ;; ldr r6, [dif, #16] ; 1 | 0
83 ;; ldr r7, [dif, #20] ; 3 | 2
84 ldr r6, [dif, #0] ; 1 | 0
85 ldr r7, [dif, #4] ; 3 | 2
87 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
88 pkhtb r9, r7, r6, asr #16 ; 3 | 1
90 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
91 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
93 usat16 r8, #8, r8
94 usat16 r9, #8, r9
95 add dif, dif, #32
96 orr r8, r8, r9, lsl #8
98 str r8, [dst], stride
100 ;0, 1, 2, 3
101 ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
102 ;; ldr r6, [dif, #24] ; 1 | 0
103 ;; ldr r7, [dif, #28] ; 3 | 2
104 ldr r6, [dif, #0] ; 1 | 0
105 ldr r7, [dif, #4] ; 3 | 2
107 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
108 pkhtb r9, r7, r6, asr #16 ; 3 | 1
110 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
111 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
113 usat16 r8, #8, r8
114 usat16 r9, #8, r9
115 orr r8, r8, r9, lsl #8
117 str r8, [dst], stride
119 ldmia sp!, {r4 - r9, pc}
121 ENDP ; |recon_b|
123 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
127 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
128 ; R0 char *pred_ptr
129 ; R1 short *dif_ptr
130 ; R2 char *dst_ptr
131 ; R3 int stride
132 |vp8_recon4b_armv6| PROC
133 stmdb sp!, {r4 - r9, lr}
135 mov lr, #4
137 recon4b_loop
138 ;0, 1, 2, 3
139 ldr r4, [prd], #4 ; 3 | 2 | 1 | 0
140 ldr r6, [dif, #0] ; 1 | 0
141 ldr r7, [dif, #4] ; 3 | 2
143 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
144 pkhtb r9, r7, r6, asr #16 ; 3 | 1
146 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
147 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
149 usat16 r8, #8, r8
150 usat16 r9, #8, r9
151 orr r8, r8, r9, lsl #8
153 str r8, [dst]
155 ;4, 5, 6, 7
156 ldr r4, [prd], #4
157 ;; ldr r6, [dif, #32]
158 ;; ldr r7, [dif, #36]
159 ldr r6, [dif, #8]
160 ldr r7, [dif, #12]
162 pkhbt r8, r6, r7, lsl #16
163 pkhtb r9, r7, r6, asr #16
165 uxtab16 r8, r8, r4
166 uxtab16 r9, r9, r4, ror #8
167 usat16 r8, #8, r8
168 usat16 r9, #8, r9
169 orr r8, r8, r9, lsl #8
171 str r8, [dst, #4]
173 ;8, 9, 10, 11
174 ldr r4, [prd], #4
175 ;; ldr r6, [dif, #64]
176 ;; ldr r7, [dif, #68]
177 ldr r6, [dif, #16]
178 ldr r7, [dif, #20]
180 pkhbt r8, r6, r7, lsl #16
181 pkhtb r9, r7, r6, asr #16
183 uxtab16 r8, r8, r4
184 uxtab16 r9, r9, r4, ror #8
185 usat16 r8, #8, r8
186 usat16 r9, #8, r9
187 orr r8, r8, r9, lsl #8
189 str r8, [dst, #8]
191 ;12, 13, 14, 15
192 ldr r4, [prd], #4
193 ;; ldr r6, [dif, #96]
194 ;; ldr r7, [dif, #100]
195 ldr r6, [dif, #24]
196 ldr r7, [dif, #28]
198 pkhbt r8, r6, r7, lsl #16
199 pkhtb r9, r7, r6, asr #16
201 uxtab16 r8, r8, r4
202 uxtab16 r9, r9, r4, ror #8
203 usat16 r8, #8, r8
204 usat16 r9, #8, r9
205 orr r8, r8, r9, lsl #8
207 str r8, [dst, #12]
209 add dst, dst, stride
210 ;; add dif, dif, #8
211 add dif, dif, #32
213 subs lr, lr, #1
214 bne recon4b_loop
216 ldmia sp!, {r4 - r9, pc}
218 ENDP ; |Recon4B|
220 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
224 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
225 ; R0 char *pred_ptr
226 ; R1 short *dif_ptr
227 ; R2 char *dst_ptr
228 ; R3 int stride
229 |vp8_recon2b_armv6| PROC
230 stmdb sp!, {r4 - r9, lr}
232 mov lr, #4
234 recon2b_loop
235 ;0, 1, 2, 3
236 ldr r4, [prd], #4
237 ldr r6, [dif, #0]
238 ldr r7, [dif, #4]
240 pkhbt r8, r6, r7, lsl #16
241 pkhtb r9, r7, r6, asr #16
243 uxtab16 r8, r8, r4
244 uxtab16 r9, r9, r4, ror #8
245 usat16 r8, #8, r8
246 usat16 r9, #8, r9
247 orr r8, r8, r9, lsl #8
249 str r8, [dst]
251 ;4, 5, 6, 7
252 ldr r4, [prd], #4
253 ;; ldr r6, [dif, #32]
254 ;; ldr r7, [dif, #36]
255 ldr r6, [dif, #8]
256 ldr r7, [dif, #12]
258 pkhbt r8, r6, r7, lsl #16
259 pkhtb r9, r7, r6, asr #16
261 uxtab16 r8, r8, r4
262 uxtab16 r9, r9, r4, ror #8
263 usat16 r8, #8, r8
264 usat16 r9, #8, r9
265 orr r8, r8, r9, lsl #8
267 str r8, [dst, #4]
269 add dst, dst, stride
270 ;; add dif, dif, #8
271 add dif, dif, #16
273 subs lr, lr, #1
274 bne recon2b_loop
276 ldmia sp!, {r4 - r9, pc}
278 ENDP ; |Recon2B|