2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
14 INCLUDE asm_enc_offsets.asm
20 AREA |.text|
, CODE
, READONLY
23 ; r1 unsigned char *cx_data
26 ; s0 vp8_coef_encodings
28 ; s2 const vp8_tree_index *,
30 |vp8cx_pack_tokens_into_partitions_armv5|
PROC
34 ; Compute address of cpi->common.mb_rows
35 ldr r4
, _VP8_COMP_common_
36 ldr r6
, _VP8_COMMON_MBrows_
39 ldr r5
, [r4
, r6
] ; load up mb_rows
41 str r5
, [sp, #
36] ; save mb_rows
42 str r1
, [sp, #
24] ; save cx_data
43 str r2
, [sp, #
20] ; save num_part
44 str r3
, [sp, #
8] ; save *size
46 ; *size = 3*(num_part -1 );
47 sub r2
, r2
, #
1 ; num_part - 1
48 add r2
, r2
, r2
, lsl #
1 ; 3*(num_part - 1)
51 add r2
, r2
, r1
; cx_data + *size
52 str r2
, [sp, #
40] ; ptr
54 ldr r4
, _VP8_COMP_tplist_
56 ldr r7
, [r4
, #
0] ; dereference cpi->tp_list
57 str r7
, [sp, #
32] ; store start of cpi->tp_list
59 ldr r11
, _VP8_COMP_bc2_
; load up vp8_writer out of cpi
63 str r11
, [sp, #
28] ; i
66 ldr r10
, [sp, #
40] ; ptr
67 ldr r5
, [sp, #
36] ; move mb_rows to the counting section
68 sub r5
, r5
, r11
; move start point with each partition
72 ; Reset all of the VP8 Writer data for each partition that
75 mov r2
, #
0 ; vp8_writer_lowvalue
76 mov r5
, #
255 ; vp8_writer_range
77 mvn r3
, #
23 ; vp8_writer_count
79 str r2
, [r0
, #vp8_writer_value
]
80 str r2
, [r0
, #vp8_writer_pos
]
81 str r10
, [r0
, #vp8_writer_buffer
]
85 ldr r1
, [r7
, #tokenlist_start
]
86 ldr r9
, [r7
, #tokenlist_stop
]
87 str r9
, [sp, #
0] ; save stop for later comparison
88 str r7
, [sp, #
16] ; tokenlist address for next time
92 ; actual work gets done here!
95 ldrb r6
, [r1
, #tokenextra_token
] ; t
96 ldr r4
, [sp, #
80] ; vp8_coef_encodings
98 add r4
, r4
, r6
, lsl #
3 ; a = vp8_coef_encodings + t
99 ldr r9
, [r1
, #tokenextra_context_tree
] ; pp
101 ldrb r7
, [r1
, #tokenextra_skip_eob_node
]
103 ldr r6
, [r4
, #vp8_token_value
] ; v
104 ldr r8
, [r4
, #vp8_token_len
] ; n
106 ; vp8 specific skip_eob_node
109 subne r8
, r8
, #
1 ; --n
111 rsb r4
, r8
, #
32 ; 32-n
112 ldr r10
, [sp, #
88] ; vp8_coef_tree
114 ; v is kept in r12 during the token pack loop
115 lsl r12
, r6
, r4
; r12 = v << 32 - n
119 ldrb r4
, [r9
, lr
, asr #
1] ; pp [i>>1]
120 sub r7
, r5
, #
1 ; range-1
122 ; Decisions are made based on the bit value shifted
123 ; off of v, so set a flag here based on this.
124 ; This value is refered to as "bb"
125 lsls r12
, r12
, #
1 ; bb = v >> n
126 mul r4
, r4
, r7
; ((range-1) * pp[i>>1]))
128 ; bb can only be 0 or 1. So only execute this statement
129 ; if bb == 1, otherwise it will act like i + 0
130 addcs lr
, lr
, #
1 ; i + bb
133 ldrsb lr
, [r10
, lr
] ; i = vp8_coef_tree[i+bb]
134 add r4
, r7
, r4
, lsr #
8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
136 addcs r2
, r2
, r4
; if (bb) lowvalue += split
137 subcs r4
, r5
, r4
; if (bb) range = range-split
139 ; Counting the leading zeros is used to normalize range.
141 sub r6
, r6
, #
24 ; shift
143 ; Flag is set on the sum of count. This flag is used later
144 ; to determine if count >= 0
145 adds r3
, r3
, r6
; count += shift
146 lsl r5
, r4
, r6
; range <<= shift
147 bmi token_count_lt_zero
; if(count >= 0)
149 sub r6
, r6
, r3
; offset = shift - count
150 sub r4
, r6
, #
1 ; offset-1
151 lsls r4
, r2
, r4
; if((lowvalue<<(offset-1)) & 0x80000000 )
152 bpl token_high_bit_not_set
154 ldr r4
, [r0
, #vp8_writer_pos
] ; x
155 sub r4
, r4
, #
1 ; x = w->pos-1
156 b token_zero_while_start
157 token_zero_while_loop
159 strb r10
, [r7
, r4
] ; w->buffer[x] =(unsigned char)0
161 token_zero_while_start
163 ldrge r7
, [r0
, #vp8_writer_buffer
]
166 beq token_zero_while_loop
168 ldr r7
, [r0
, #vp8_writer_buffer
]
169 ldrb r10
, [r7
, r4
] ; w->buffer[x]
171 strb r10
, [r7
, r4
] ; w->buffer[x] + 1
172 token_high_bit_not_set
173 rsb r4
, r6
, #
24 ; 24-offset
174 ldr r10
, [r0
, #vp8_writer_buffer
]
175 lsr r7
, r2
, r4
; lowvalue >> (24-offset)
176 ldr r4
, [r0
, #vp8_writer_pos
] ; w->pos
177 lsl r2
, r2
, r6
; lowvalue <<= offset
178 mov r6
, r3
; shift = count
179 add r11
, r4
, #
1 ; w->pos++
180 bic r2
, r2
, #
0xff000000 ; lowvalue &= 0xffffff
181 str r11
, [r0
, #vp8_writer_pos
]
182 sub r3
, r3
, #
8 ; count -= 8
183 strb r7
, [r10
, r4
] ; w->buffer[w->pos++]
185 ; r10 is used earlier in the loop, but r10 is used as
186 ; temp variable here. So after r10 is used, reload
187 ; vp8_coef_tree_dcd into r10
188 ldr r10
, [sp, #
88] ; vp8_coef_tree
191 lsl r2
, r2
, r6
; lowvalue <<= shift
193 subs r8
, r8
, #
1 ; --n
196 ldrb r6
, [r1
, #tokenextra_token
] ; t
197 ldr r7
, [sp, #
84] ; vp8_extra_bits
198 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
199 ; element. Here vp8_extra_bit_struct == 16
200 add r12
, r7
, r6
, lsl #
4 ; b = vp8_extra_bits + t
202 ldr r4
, [r12
, #vp8_extra_bit_struct_base_val
]
207 ldr r8
, [r12
, #vp8_extra_bit_struct_len
] ; L
208 ldrsh lr
, [r1
, #tokenextra_extra
] ; e = p->Extra
212 ldr r9
, [r12
, #vp8_extra_bit_struct_prob
]
213 asr r7
, lr
, #
1 ; v=e>>1
215 ldr r10
, [r12
, #vp8_extra_bit_struct_tree
]
216 str r10
, [sp, #
4] ; b->tree
224 ldrb r4
, [r9
, lr
, asr #
1] ; pp[i>>1]
225 sub r7
, r5
, #
1 ; range-1
226 lsls r12
, r12
, #
1 ; v >> n
227 mul r4
, r4
, r7
; (range-1) * pp[i>>1]
228 addcs lr
, lr
, #
1 ; i + bb
231 ldrsb lr
, [r10
, lr
] ; i = b->tree[i+bb]
232 add r4
, r7
, r4
, lsr #
8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
234 addcs r2
, r2
, r4
; if (bb) lowvalue += split
235 subcs r4
, r5
, r4
; if (bb) range = range-split
240 adds r3
, r3
, r6
; count += shift
241 lsl r5
, r4
, r6
; range <<= shift
242 bmi extra_count_lt_zero
; if(count >= 0)
244 sub r6
, r6
, r3
; offset= shift - count
245 sub r4
, r6
, #
1 ; offset-1
246 lsls r4
, r2
, r4
; if((lowvalue<<(offset-1)) & 0x80000000 )
247 bpl extra_high_bit_not_set
249 ldr r4
, [r0
, #vp8_writer_pos
] ; x
250 sub r4
, r4
, #
1 ; x = w->pos - 1
251 b extra_zero_while_start
252 extra_zero_while_loop
254 strb r10
, [r7
, r4
] ; w->buffer[x] =(unsigned char)0
256 extra_zero_while_start
258 ldrge r7
, [r0
, #vp8_writer_buffer
]
261 beq extra_zero_while_loop
263 ldr r7
, [r0
, #vp8_writer_buffer
]
267 extra_high_bit_not_set
268 rsb r4
, r6
, #
24 ; 24-offset
269 ldr r10
, [r0
, #vp8_writer_buffer
]
270 lsr r7
, r2
, r4
; lowvalue >> (24-offset)
271 ldr r4
, [r0
, #vp8_writer_pos
]
272 lsl r2
, r2
, r6
; lowvalue <<= offset
273 mov r6
, r3
; shift = count
274 add r11
, r4
, #
1 ; w->pos++
275 bic r2
, r2
, #
0xff000000 ; lowvalue &= 0xffffff
276 str r11
, [r0
, #vp8_writer_pos
]
277 sub r3
, r3
, #
8 ; count -= 8
278 strb r7
, [r10
, r4
] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
279 ldr r10
, [sp, #
4] ; b->tree
283 subs r8
, r8
, #
1 ; --n
284 bne extra_bits_loop
; while (n)
287 ldr lr
, [r1
, #
4] ; e = p->Extra
288 add r4
, r5
, #
1 ; range + 1
290 lsr r4
, r4
, #
1 ; split = (range + 1) >> 1
291 addne r2
, r2
, r4
; lowvalue += split
292 subne r4
, r5
, r4
; range = range-split
293 tst r2
, #
0x80000000 ; lowvalue & 0x80000000
294 lsl r5
, r4
, #
1 ; range <<= 1
295 beq end_high_bit_not_set
297 ldr r4
, [r0
, #vp8_writer_pos
]
300 b end_zero_while_start
306 ldrge r6
, [r0
, #vp8_writer_buffer
]
309 beq end_zero_while_loop
311 ldr r6
, [r0
, #vp8_writer_buffer
]
316 adds r3
, r3
, #
1 ; ++count
317 lsl r2
, r2
, #
1 ; lowvalue <<= 1
320 ldr r4
, [r0
, #vp8_writer_pos
]
322 ldr r7
, [r0
, #vp8_writer_buffer
]
323 lsr r6
, r2
, #
24 ; lowvalue >> 24
324 add r12
, r4
, #
1 ; w->pos++
325 bic r2
, r2
, #
0xff000000 ; lowvalue &= 0xffffff
330 add r1
, r1
, #TOKENEXTRA_SZ
; ++p
332 ldr r4
, [sp, #
0] ; stop
333 cmp r1
, r4
; while( p < stop)
336 ldr r10
, [sp, #
20] ; num_parts
337 mov r1
, #TOKENLIST_SZ
340 ldr r6
, [sp, #
12] ; mb_rows
341 ldr r7
, [sp, #
16] ; tokenlist address
343 add r7
, r7
, r1
; next element in the array
350 sub r7
, r5
, #
1 ; range-1
352 mov r4
, r7
, lsl #
7 ; ((range-1) * 128)
355 add r4
, r7
, r4
, lsr #
8 ; 1 + (((range-1) * 128) >> 8)
357 ; Counting the leading zeros is used to normalize range.
359 sub r6
, r6
, #
24 ; shift
361 ; Flag is set on the sum of count. This flag is used later
362 ; to determine if count >= 0
363 adds r3
, r3
, r6
; count += shift
364 lsl r5
, r4
, r6
; range <<= shift
365 bmi token_count_lt_zero_se
; if(count >= 0)
367 sub r6
, r6
, r3
; offset = shift - count
368 sub r4
, r6
, #
1 ; offset-1
369 lsls r4
, r2
, r4
; if((lowvalue<<(offset-1)) & 0x80000000 )
370 bpl token_high_bit_not_set_se
372 ldr r4
, [r0
, #vp8_writer_pos
] ; x
373 sub r4
, r4
, #
1 ; x = w->pos-1
374 b token_zero_while_start_se
375 token_zero_while_loop_se
377 strb r10
, [r7
, r4
] ; w->buffer[x] =(unsigned char)0
379 token_zero_while_start_se
381 ldrge r7
, [r0
, #vp8_writer_buffer
]
384 beq token_zero_while_loop_se
386 ldr r7
, [r0
, #vp8_writer_buffer
]
387 ldrb r10
, [r7
, r4
] ; w->buffer[x]
389 strb r10
, [r7
, r4
] ; w->buffer[x] + 1
390 token_high_bit_not_set_se
391 rsb r4
, r6
, #
24 ; 24-offset
392 ldr r10
, [r0
, #vp8_writer_buffer
]
393 lsr r7
, r2
, r4
; lowvalue >> (24-offset)
394 ldr r4
, [r0
, #vp8_writer_pos
] ; w->pos
395 lsl r2
, r2
, r6
; lowvalue <<= offset
396 mov r6
, r3
; shift = count
397 add r11
, r4
, #
1 ; w->pos++
398 bic r2
, r2
, #
0xff000000 ; lowvalue &= 0xffffff
399 str r11
, [r0
, #vp8_writer_pos
]
400 sub r3
, r3
, #
8 ; count -= 8
401 strb r7
, [r10
, r4
] ; w->buffer[w->pos++]
403 token_count_lt_zero_se
404 lsl r2
, r2
, r6
; lowvalue <<= shift
409 ldr r10
, [sp, #
8] ; *size
411 ldr r4
, [r0
, #vp8_writer_pos
] ; w->pos
412 add r11
, r11
, r4
; *size += w->pos
415 ldr r9
, [sp, #
20] ; num_parts
417 ldr r10
, [sp, #
28] ; i
418 cmp r10
, r9
; if(i<(num_part - 1))
419 bge skip_write_partition
421 ldr r12
, [sp, #
40] ; ptr
422 add r12
, r12
, r4
; ptr += w->pos
425 ldr r9
, [sp, #
24] ; cx_data
432 add r9
, r9
, #
3 ; cx_data += 3
437 ldr r11
, [sp, #
28] ; i
438 ldr r10
, [sp, #
20] ; num_parts
440 add r11
, r11
, #
1 ; i++
443 ldr r7
, [sp, #
32] ; cpi->tp_list[i]
444 mov r1
, #TOKENLIST_SZ
445 add r7
, r7
, r1
; next element in cpi->tp_list
446 str r7
, [sp, #
32] ; cpi->tp_list[i+1]
459 DCD vp8_common_mb_rows