Merge "Skip computation of distortion in vp8_pick_inter_mode if active_map is used"
[libvpx.git] / vp8 / common / arm / armv6 / recon_v6.asm
blob99c7bcf2dd8abaaf284744bcec198c89fb88458c
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_recon_b_armv6|
13 EXPORT |vp8_recon2b_armv6|
14 EXPORT |vp8_recon4b_armv6|
16 AREA |.text|, CODE, READONLY ; name this block of code
17 prd RN r0
18 dif RN r1
19 dst RN r2
20 stride RN r3
22 ;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride)
23 ; R0 char* pred_ptr
24 ; R1 short * dif_ptr
25 ; R2 char * dst_ptr
26 ; R3 int stride
28 ; Description:
29 ; Loop through the block adding the Pred and Diff together. Clamp and then
30 ; store back into the Dst.
32 ; Restrictions :
33 ; all buffers are expected to be 4 byte aligned coming in and
34 ; going out.
35 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
39 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
40 |vp8_recon_b_armv6| PROC
41 stmdb sp!, {r4 - r9, lr}
43 ;0, 1, 2, 3
44 ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
45 ldr r6, [dif, #0] ; 1 | 0
46 ldr r7, [dif, #4] ; 3 | 2
48 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
49 pkhtb r9, r7, r6, asr #16 ; 3 | 1
51 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
52 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
54 usat16 r8, #8, r8
55 usat16 r9, #8, r9
56 add dif, dif, #32
57 orr r8, r8, r9, lsl #8
59 str r8, [dst], stride
61 ;0, 1, 2, 3
62 ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
63 ;; ldr r6, [dif, #8] ; 1 | 0
64 ;; ldr r7, [dif, #12] ; 3 | 2
65 ldr r6, [dif, #0] ; 1 | 0
66 ldr r7, [dif, #4] ; 3 | 2
68 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
69 pkhtb r9, r7, r6, asr #16 ; 3 | 1
71 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
72 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
74 usat16 r8, #8, r8
75 usat16 r9, #8, r9
76 add dif, dif, #32
77 orr r8, r8, r9, lsl #8
79 str r8, [dst], stride
81 ;0, 1, 2, 3
82 ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
83 ;; ldr r6, [dif, #16] ; 1 | 0
84 ;; ldr r7, [dif, #20] ; 3 | 2
85 ldr r6, [dif, #0] ; 1 | 0
86 ldr r7, [dif, #4] ; 3 | 2
88 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
89 pkhtb r9, r7, r6, asr #16 ; 3 | 1
91 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
92 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
94 usat16 r8, #8, r8
95 usat16 r9, #8, r9
96 add dif, dif, #32
97 orr r8, r8, r9, lsl #8
99 str r8, [dst], stride
101 ;0, 1, 2, 3
102 ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
103 ;; ldr r6, [dif, #24] ; 1 | 0
104 ;; ldr r7, [dif, #28] ; 3 | 2
105 ldr r6, [dif, #0] ; 1 | 0
106 ldr r7, [dif, #4] ; 3 | 2
108 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
109 pkhtb r9, r7, r6, asr #16 ; 3 | 1
111 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
112 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
114 usat16 r8, #8, r8
115 usat16 r9, #8, r9
116 orr r8, r8, r9, lsl #8
118 str r8, [dst], stride
120 ldmia sp!, {r4 - r9, pc}
122 ENDP ; |recon_b|
124 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
128 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
129 ; R0 char *pred_ptr
130 ; R1 short *dif_ptr
131 ; R2 char *dst_ptr
132 ; R3 int stride
133 |vp8_recon4b_armv6| PROC
134 stmdb sp!, {r4 - r9, lr}
136 mov lr, #4
138 recon4b_loop
139 ;0, 1, 2, 3
140 ldr r4, [prd], #4 ; 3 | 2 | 1 | 0
141 ldr r6, [dif, #0] ; 1 | 0
142 ldr r7, [dif, #4] ; 3 | 2
144 pkhbt r8, r6, r7, lsl #16 ; 2 | 0
145 pkhtb r9, r7, r6, asr #16 ; 3 | 1
147 uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
148 uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
150 usat16 r8, #8, r8
151 usat16 r9, #8, r9
152 orr r8, r8, r9, lsl #8
154 str r8, [dst]
156 ;4, 5, 6, 7
157 ldr r4, [prd], #4
158 ;; ldr r6, [dif, #32]
159 ;; ldr r7, [dif, #36]
160 ldr r6, [dif, #8]
161 ldr r7, [dif, #12]
163 pkhbt r8, r6, r7, lsl #16
164 pkhtb r9, r7, r6, asr #16
166 uxtab16 r8, r8, r4
167 uxtab16 r9, r9, r4, ror #8
168 usat16 r8, #8, r8
169 usat16 r9, #8, r9
170 orr r8, r8, r9, lsl #8
172 str r8, [dst, #4]
174 ;8, 9, 10, 11
175 ldr r4, [prd], #4
176 ;; ldr r6, [dif, #64]
177 ;; ldr r7, [dif, #68]
178 ldr r6, [dif, #16]
179 ldr r7, [dif, #20]
181 pkhbt r8, r6, r7, lsl #16
182 pkhtb r9, r7, r6, asr #16
184 uxtab16 r8, r8, r4
185 uxtab16 r9, r9, r4, ror #8
186 usat16 r8, #8, r8
187 usat16 r9, #8, r9
188 orr r8, r8, r9, lsl #8
190 str r8, [dst, #8]
192 ;12, 13, 14, 15
193 ldr r4, [prd], #4
194 ;; ldr r6, [dif, #96]
195 ;; ldr r7, [dif, #100]
196 ldr r6, [dif, #24]
197 ldr r7, [dif, #28]
199 pkhbt r8, r6, r7, lsl #16
200 pkhtb r9, r7, r6, asr #16
202 uxtab16 r8, r8, r4
203 uxtab16 r9, r9, r4, ror #8
204 usat16 r8, #8, r8
205 usat16 r9, #8, r9
206 orr r8, r8, r9, lsl #8
208 str r8, [dst, #12]
210 add dst, dst, stride
211 ;; add dif, dif, #8
212 add dif, dif, #32
214 subs lr, lr, #1
215 bne recon4b_loop
217 ldmia sp!, {r4 - r9, pc}
219 ENDP ; |Recon4B|
221 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
225 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
226 ; R0 char *pred_ptr
227 ; R1 short *dif_ptr
228 ; R2 char *dst_ptr
229 ; R3 int stride
230 |vp8_recon2b_armv6| PROC
231 stmdb sp!, {r4 - r9, lr}
233 mov lr, #4
235 recon2b_loop
236 ;0, 1, 2, 3
237 ldr r4, [prd], #4
238 ldr r6, [dif, #0]
239 ldr r7, [dif, #4]
241 pkhbt r8, r6, r7, lsl #16
242 pkhtb r9, r7, r6, asr #16
244 uxtab16 r8, r8, r4
245 uxtab16 r9, r9, r4, ror #8
246 usat16 r8, #8, r8
247 usat16 r9, #8, r9
248 orr r8, r8, r9, lsl #8
250 str r8, [dst]
252 ;4, 5, 6, 7
253 ldr r4, [prd], #4
254 ;; ldr r6, [dif, #32]
255 ;; ldr r7, [dif, #36]
256 ldr r6, [dif, #8]
257 ldr r7, [dif, #12]
259 pkhbt r8, r6, r7, lsl #16
260 pkhtb r9, r7, r6, asr #16
262 uxtab16 r8, r8, r4
263 uxtab16 r9, r9, r4, ror #8
264 usat16 r8, #8, r8
265 usat16 r9, #8, r9
266 orr r8, r8, r9, lsl #8
268 str r8, [dst, #4]
270 add dst, dst, stride
271 ;; add dif, dif, #8
272 add dif, dif, #16
274 subs lr, lr, #1
275 bne recon2b_loop
277 ldmia sp!, {r4 - r9, pc}
279 ENDP ; |Recon2B|