Initial WebM release
[libvpx.git] / vp8 / decoder / arm / detokenizearm_v6.asm
blob4d87ee5bdf9c485c070753a55783c151b22b2581
2 ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree.
11 EXPORT |vp8_decode_mb_tokens_v5|
13 AREA |.text|, CODE, READONLY ; name this block of code
15 INCLUDE vpx_asm_offsets.asm
17 l_qcoeff EQU 0
18 l_i EQU 4
19 l_type EQU 8
20 l_stop EQU 12
21 l_c EQU 16
22 l_l_ptr EQU 20
23 l_a_ptr EQU 24
24 l_bc EQU 28
25 l_coef_ptr EQU 32
26 l_stacksize EQU 64
29 ;; constant offsets -- these should be created at build time
30 c_onyxblock2left_offset EQU 25
31 c_onyxblock2above_offset EQU 50
32 c_entropy_nodes EQU 11
33 c_dct_eob_token EQU 11
35 |vp8_decode_mb_tokens_v5| PROC
36 stmdb sp!, {r4 - r11, lr}
37 sub sp, sp, #l_stacksize
38 mov r7, r1
39 mov r9, r0 ;DETOK *detoken
41 ldr r1, [r9, #detok_current_bc]
42 ldr r0, [r9, #detok_qcoeff_start_ptr]
43 mov r11, #0
44 mov r3, #0x10
46 cmp r7, #1
47 addeq r11, r11, #24
48 addeq r3, r3, #8
49 addeq r0, r0, #3, 24
51 str r0, [sp, #l_qcoeff]
52 str r11, [sp, #l_i]
53 str r7, [sp, #l_type]
54 str r3, [sp, #l_stop]
55 str r1, [sp, #l_bc]
57 add lr, r9, r7, lsl #2
59 ldr r2, [r1, #bool_decoder_buffer]
60 ldr r3, [r1, #bool_decoder_pos]
62 ldr r10, [lr, #detok_coef_probs]
63 ldr r5, [r1, #bool_decoder_count]
64 ldr r6, [r1, #bool_decoder_range]
65 ldr r4, [r1, #bool_decoder_value]
66 add r8, r2, r3
68 str r10, [sp, #l_coef_ptr]
71 ;align 4
72 BLOCK_LOOP
73 ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove]
74 ldr r2, [r9, #DETOK_A]
75 ldr r1, [r9, #DETOK_L]
76 ldrb r12, [r3, +r11] ; detoken->ptr_onyxblock2context_leftabove[i]
78 cmp r7, #0 ; check type
79 moveq r7, #1
80 movne r7, #0
82 ldr r0, [r2, +r12, lsl #2] ; a
83 add r1, r1, r12, lsl #4
84 add r3, r3, r11
86 ldrb r2, [r3, #c_onyxblock2above_offset]
87 ldrb r3, [r3, #c_onyxblock2left_offset]
88 mov lr, #c_entropy_nodes
89 ;; ;++
91 ldr r2, [r0, +r2, lsl #2]!
92 add r3, r1, r3, lsl #2
93 str r3, [sp, #l_l_ptr]
94 ldr r3, [r3]
96 cmp r2, #0
97 movne r2, #1
98 cmp r3, #0
99 addne r2, r2, #1
101 str r0, [sp, #l_a_ptr]
102 smlabb r0, r2, lr, r10
103 mov r1, #0 ; t = 0
104 str r7, [sp, #l_c]
106 ;align 4
107 COEFF_LOOP
108 ldr r3, [r9, #detok_ptr_onyx_coef_bands_x]
109 ldr lr, [r9, #detok_onyx_coef_tree_ptr]
111 ;;the following two lines are used if onyx_coef_bands_x is UINT16
112 ;; add r3, r3, r7, lsl #1
113 ;; ldrh r3, [r3]
115 ;;the following line is used if onyx_coef_bands_x is UINT8
116 ldrb r3, [r7, +r3]
119 ;; ;++
120 ;; pld [r8]
122 add r0, r0, r3
124 ;align 4
125 get_token_loop
126 ldrb r2, [r0, +r1, asr #1]
127 mov r3, r6, lsl #8
128 sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
129 mov r10, #1
131 smlawb r2, r3, r2, r10
132 ldrb r12, [r8] ;load cx data byte in stall slot
135 subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
136 addhs r1, r1, #1 ;t += 1
137 movhs r4, r3 ;update value
138 subhs r2, r6, r2 ;range = range - split
139 movlo r6, r2
141 ;;; ldrsbhs r1, [r1, +lr]
142 ldrsb r1, [r1, +lr]
145 ;; use branch for short pipelines ???
146 ;; cmp r2, #0x80
147 ;; bcs |$LN22@decode_mb_to|
149 clz r3, r2
150 sub r3, r3, #24
151 subs r5, r5, r3
152 mov r6, r2, lsl r3
153 mov r4, r4, lsl r3
155 ;; use branch for short pipelines ???
156 ;; bgt |$LN22@decode_mb_to|
158 addle r5, r5, #8
159 rsble r3, r5, #8
160 addle r8, r8, #1
161 orrle r4, r4, r12, lsl r3
163 ;;|$LN22@decode_mb_to|
165 cmp r1, #0
166 bgt get_token_loop
168 cmn r1, #c_dct_eob_token ;if(t == -DCT_EOB_TOKEN)
169 beq END_OF_BLOCK
171 rsb lr, r1, #0 ;v = -t;
173 cmp lr, #4 ;if(v > FOUR_TOKEN)
174 ble SKIP_EXTRABITS
176 ldr r3, [r9, #detok_teb_base_ptr]
177 mov r11, #1
178 add r7, r3, lr, lsl #4
180 ldrsh lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val
181 ldrsh r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length
183 extrabits_loop
184 add r3, r0, r7
186 ldrb r2, [r3, #4]
187 mov r3, r6, lsl #8
188 sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8)
189 mov r10, #1
191 smlawb r2, r3, r2, r10
192 ldrb r12, [r8]
195 subs r10, r4, r2, lsl #24 ;x = value-(split<<24)
196 movhs r4, r10 ;update value
197 subhs r2, r6, r2 ;range = range - split
198 addhs lr, lr, r11, lsl r0 ;v += ((UINT16)1<<bits_count)
199 movlo r6, r2 ;range = split
202 ;; use branch for short pipelines ???
203 ;; cmp r2, #0x80
204 ;; bcs |$LN10@decode_mb_to|
206 clz r3, r2
207 sub r3, r3, #24
208 subs r5, r5, r3
209 mov r6, r2, lsl r3 ;range
210 mov r4, r4, lsl r3 ;value
212 addle r5, r5, #8
213 addle r8, r8, #1
214 rsble r3, r5, #8
215 orrle r4, r4, r12, lsl r3
217 ;;|$LN10@decode_mb_to|
218 subs r0, r0, #1
219 bpl extrabits_loop
222 SKIP_EXTRABITS
223 ldr r11, [sp, #l_qcoeff]
224 ldr r0, [sp, #l_coef_ptr]
226 cmp r1, #0 ;check for nonzero token
227 beq SKIP_EOB_CHECK ;if t is zero, we will skip the eob table chec
229 sub r3, r6, #1 ;range - 1
231 mov r3, r3, lsl #7 ; *= onyx_prob_half (128)
233 mov r3, r3, lsr #8
234 add r2, r3, #1 ;split
236 subs r3, r4, r2, lsl #24 ;x = value-(split<<24)
237 movhs r4, r3 ;update value
238 subhs r2, r6, r2 ;range = range - split
239 mvnhs r3, lr
240 addhs lr, r3, #1 ;v = (v ^ -1) + 1
241 movlo r6, r2 ;range = split
243 ;; use branch for short pipelines ???
244 ;; cmp r2, #0x80
245 ;; bcs |$LN6@decode_mb_to|
247 clz r3, r2
248 sub r3, r3, #24
249 subs r5, r5, r3
250 mov r6, r2, lsl r3
251 mov r4, r4, lsl r3
252 ldrleb r2, [r8], #1
253 addle r5, r5, #8
254 rsble r3, r5, #8
255 orrle r4, r4, r2, lsl r3
257 ;;|$LN6@decode_mb_to|
258 add r0, r0, #0xB
260 cmn r1, #1
262 addlt r0, r0, #0xB
264 mvn r1, #1
266 SKIP_EOB_CHECK
267 ldr r7, [sp, #l_c]
268 ldr r3, [r9, #detok_scan]
269 add r1, r1, #2
270 cmp r7, #(0x10 - 1) ;assume one less for now.... increment below
272 ldr r3, [r3, +r7, lsl #2]
273 add r7, r7, #1
274 add r3, r11, r3, lsl #1
276 str r7, [sp, #l_c]
277 strh lr, [r3]
279 blt COEFF_LOOP
281 sub r7, r7, #1 ;if(t != -DCT_EOB_TOKEN) --c
283 END_OF_BLOCK
284 ldr r3, [sp, #l_type]
285 ldr r10, [sp, #l_coef_ptr]
286 ldr r0, [sp, #l_qcoeff]
287 ldr r11, [sp, #l_i]
288 ldr r12, [sp, #l_stop]
290 cmp r3, #0
291 moveq r1, #1
292 movne r1, #0
293 add r3, r11, r9
295 cmp r7, r1
296 strb r7, [r3, #detok_eob]
298 ldr r7, [sp, #l_l_ptr]
299 ldr r2, [sp, #l_a_ptr]
300 movne r3, #1
301 moveq r3, #0
303 add r0, r0, #0x20
304 add r11, r11, #1
305 str r3, [r7]
306 str r3, [r2]
307 str r0, [sp, #l_qcoeff]
308 str r11, [sp, #l_i]
310 cmp r11, r12 ;i >= stop ?
311 ldr r7, [sp, #l_type]
312 mov lr, #0xB
314 blt BLOCK_LOOP
316 cmp r11, #0x19
317 bne ln2_decode_mb_to
319 ldr r12, [r9, #detok_qcoeff_start_ptr]
320 ldr r10, [r9, #detok_coef_probs]
321 mov r7, #0
322 mov r3, #0x10
323 str r12, [sp, #l_qcoeff]
324 str r7, [sp, #l_i]
325 str r7, [sp, #l_type]
326 str r3, [sp, #l_stop]
328 str r10, [sp, #l_coef_ptr]
330 b BLOCK_LOOP
332 ln2_decode_mb_to
333 cmp r11, #0x10
334 bne ln1_decode_mb_to
336 ldr r10, [r9, #0x30]
338 mov r7, #2
339 mov r3, #0x18
341 str r7, [sp, #l_type]
342 str r3, [sp, #l_stop]
344 str r10, [sp, #l_coef_ptr]
345 b BLOCK_LOOP
347 ln1_decode_mb_to
348 ldr r2, [sp, #l_bc]
349 mov r0, #0
352 ldr r3, [r2, #bool_decoder_buffer]
353 str r5, [r2, #bool_decoder_count]
354 str r4, [r2, #bool_decoder_value]
355 sub r3, r8, r3
356 str r3, [r2, #bool_decoder_pos]
357 str r6, [r2, #bool_decoder_range]
359 add sp, sp, #l_stacksize
360 ldmia sp!, {r4 - r11, pc}
362 ENDP ; |vp8_decode_mb_tokens_v5|