2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_yv12_extend_frame_borders_neon|
17 INCLUDE asm_com_offsets.asm
19 AREA ||.text||
, CODE
, READONLY
, ALIGN=2
20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf);
21 ;Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_height
22 ; are always multiples of 16.
24 |vp8_yv12_extend_frame_borders_neon|
PROC
28 ;Not need to load y_width, since: y_width = y_stride - 2*border
29 ldr r3
, [r0
, #yv12_buffer_config_border
]
30 ldr r1
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
31 ldr r4
, [r0
, #yv12_buffer_config_y_height
]
32 ldr lr
, [r0
, #yv12_buffer_config_y_stride
]
35 beq b16_extend_frame_borders
37 ;=======================
38 b32_extend_frame_borders
40 ;=======================
41 ;Border copy for Y plane
42 ;copy the left and right most columns out
43 sub r5
, r1
, r3
;destptr1
45 sub r6
, r6
, r3
, lsl #
1 ;destptr2
46 sub r2
, r6
, #
1 ;srcptr2
48 ;Do four rows at one time
52 vld1.8
{d0[], d1[]}, [r1
], lr
53 vld1.8
{d4[], d5[]}, [r2
], lr
54 vld1.8
{d8[], d9[]}, [r1
], lr
55 vld1.8
{d12[], d13[]}, [r2
], lr
56 vld1.8
{d16[], d17[]}, [r1
], lr
57 vld1.8
{d20[], d21[]}, [r2
], lr
58 vld1.8
{d24[], d25[]}, [r1
], lr
59 vld1.8
{d28[], d29[]}, [r2
], lr
72 vst1.8
{q0, q1}, [r5
], lr
73 vst1.8
{q2, q3}, [r6
], lr
74 vst1.8
{q4, q5}, [r5
], lr
75 vst1.8
{q6, q7}, [r6
], lr
76 vst1.8
{q8, q9}, [r5
], lr
77 vst1.8
{q10, q11}, [r6
], lr
78 vst1.8
{q12, q13}, [r5
], lr
79 vst1.8
{q14, q15}, [r6
], lr
83 ;Now copy the top and bottom source lines into each line of the respective borders
84 ldr r7
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
89 sub r6
, r1
, r3
;destptr2
90 sub r2
, r6
, lr
;srcptr2
91 sub r1
, r7
, r3
;srcptr1
92 sub r5
, r1
, r8
;destptr1
95 vld1.8
{q0, q1}, [r1
]!
96 vld1.8
{q8, q9}, [r2
]!
97 vld1.8
{q2, q3}, [r1
]!
98 vld1.8
{q10, q11}, [r2
]!
99 vld1.8
{q4, q5}, [r1
]!
100 vld1.8
{q12, q13}, [r2
]!
101 vld1.8
{q6, q7}, [r1
]!
102 vld1.8
{q14, q15}, [r2
]!
109 vst1.8
{q0, q1}, [r5
]!
110 vst1.8
{q8, q9}, [r6
]!
111 vst1.8
{q2, q3}, [r5
]!
112 vst1.8
{q10, q11}, [r6
]!
113 vst1.8
{q4, q5}, [r5
]!
114 vst1.8
{q12, q13}, [r6
]!
115 vst1.8
{q6, q7}, [r5
]!
116 vst1.8
{q14, q15}, [r6
]!
129 bne copy_top_bottom_y
131 mov r7
, lr
, lsr #
4 ;check to see if extra copy is needed
133 bne extra_top_bottom_y
136 ;Border copy for U, V planes
137 ldr r1
, [r0
, #yv12_buffer_config_u_buffer
] ;srcptr1
138 mov lr
, lr
, lsr #
1 ;uv_stride
139 mov r3
, r3
, lsr #
1 ;border
140 mov r4
, r4
, lsr #
1 ;uv_height
145 ;copy the left and right most columns out
147 sub r5
, r1
, r3
;destptr1
149 sub r6
, r6
, r3
, lsl #
1 ;destptr2
150 sub r2
, r6
, #
1 ;srcptr2
154 ;Do eight rows at one time
158 vld1.8
{d0[], d1[]}, [r1
], lr
159 vld1.8
{d2[], d3[]}, [r2
], lr
160 vld1.8
{d4[], d5[]}, [r1
], lr
161 vld1.8
{d6[], d7[]}, [r2
], lr
162 vld1.8
{d8[], d9[]}, [r1
], lr
163 vld1.8
{d10[], d11[]}, [r2
], lr
164 vld1.8
{d12[], d13[]}, [r1
], lr
165 vld1.8
{d14[], d15[]}, [r2
], lr
166 vld1.8
{d16[], d17[]}, [r1
], lr
167 vld1.8
{d18[], d19[]}, [r2
], lr
168 vld1.8
{d20[], d21[]}, [r1
], lr
169 vld1.8
{d22[], d23[]}, [r2
], lr
170 vld1.8
{d24[], d25[]}, [r1
], lr
171 vld1.8
{d26[], d27[]}, [r2
], lr
172 vld1.8
{d28[], d29[]}, [r1
], lr
173 vld1.8
{d30[], d31[]}, [r2
], lr
177 vst1.8
{q0}, [r5
], lr
178 vst1.8
{q1}, [r6
], lr
179 vst1.8
{q2}, [r5
], lr
180 vst1.8
{q3}, [r6
], lr
181 vst1.8
{q4}, [r5
], lr
182 vst1.8
{q5}, [r6
], lr
183 vst1.8
{q6}, [r5
], lr
184 vst1.8
{q7}, [r6
], lr
185 vst1.8
{q8}, [r5
], lr
186 vst1.8
{q9}, [r6
], lr
187 vst1.8
{q10}, [r5
], lr
188 vst1.8
{q11}, [r6
], lr
189 vst1.8
{q12}, [r5
], lr
190 vst1.8
{q13}, [r6
], lr
191 vst1.8
{q14}, [r5
], lr
192 vst1.8
{q15}, [r6
], lr
194 bne copy_left_right_uv
196 ;Now copy the top and bottom source lines into each line of the respective borders
199 sub r6
, r1
, r3
;destptr2
200 sub r2
, r6
, lr
;srcptr2
201 sub r1
, r7
, r3
;srcptr1
202 sub r5
, r1
, r8
;destptr1
205 vld1.8
{q0, q1}, [r1
]!
206 vld1.8
{q8, q9}, [r2
]!
207 vld1.8
{q2, q3}, [r1
]!
208 vld1.8
{q10, q11}, [r2
]!
215 vst1.8
{q0, q1}, [r5
]!
216 vst1.8
{q8, q9}, [r6
]!
217 vst1.8
{q2, q3}, [r5
]!
218 vst1.8
{q10, q11}, [r6
]!
231 bne copy_top_bottom_uv
233 mov r7
, lr
, lsr #
3 ;check to see if extra copy is needed
235 bne extra_top_bottom_uv
237 end_of_border_copy_uv
239 ldrne r1
, [r0
, #yv12_buffer_config_v_buffer
] ;srcptr1
245 ;;;;;;;;;;;;;;;;;;;;;;
246 ;extra copy part for Y
256 vst1.8
{q0}, [r5
], lr
257 vst1.8
{q2}, [r6
], lr
258 vst1.8
{q0}, [r5
], lr
259 vst1.8
{q2}, [r6
], lr
260 vst1.8
{q0}, [r5
], lr
261 vst1.8
{q2}, [r6
], lr
262 vst1.8
{q0}, [r5
], lr
263 vst1.8
{q2}, [r6
], lr
264 vst1.8
{q0}, [r5
], lr
265 vst1.8
{q2}, [r6
], lr
266 vst1.8
{q0}, [r5
], lr
267 vst1.8
{q2}, [r6
], lr
268 vst1.8
{q0}, [r5
], lr
269 vst1.8
{q2}, [r6
], lr
270 vst1.8
{q0}, [r5
], lr
271 vst1.8
{q2}, [r6
], lr
272 bne extra_top_bottom_32
277 bne extra_top_bottom_y
279 b end_of_border_copy_y
281 ;extra copy part for UV
291 vst1.8
{d0}, [r5
], lr
292 vst1.8
{d8}, [r6
], lr
293 vst1.8
{d0}, [r5
], lr
294 vst1.8
{d8}, [r6
], lr
295 vst1.8
{d0}, [r5
], lr
296 vst1.8
{d8}, [r6
], lr
297 vst1.8
{d0}, [r5
], lr
298 vst1.8
{d8}, [r6
], lr
299 vst1.8
{d0}, [r5
], lr
300 vst1.8
{d8}, [r6
], lr
301 vst1.8
{d0}, [r5
], lr
302 vst1.8
{d8}, [r6
], lr
303 vst1.8
{d0}, [r5
], lr
304 vst1.8
{d8}, [r6
], lr
305 vst1.8
{d0}, [r5
], lr
306 vst1.8
{d8}, [r6
], lr
307 bne extra_top_bottom_16
312 bne extra_top_bottom_uv
314 b end_of_border_copy_uv
317 ;=======================
318 b16_extend_frame_borders
320 ;=======================
321 ;Border copy for Y plane
322 ;copy the left and right most columns out
323 sub r5
, r1
, r3
;destptr1
325 sub r6
, r6
, r3
, lsl #
1 ;destptr2
326 sub r2
, r6
, #
1 ;srcptr2
328 ;Do four rows at one time
331 copy_left_right_y_b16
332 vld1.8
{d0[], d1[]}, [r1
], lr
333 vld1.8
{d4[], d5[]}, [r2
], lr
334 vld1.8
{d8[], d9[]}, [r1
], lr
335 vld1.8
{d12[], d13[]}, [r2
], lr
336 vld1.8
{d16[], d17[]}, [r1
], lr
337 vld1.8
{d20[], d21[]}, [r2
], lr
338 vld1.8
{d24[], d25[]}, [r1
], lr
339 vld1.8
{d28[], d29[]}, [r2
], lr
343 vst1.8
{q0}, [r5
], lr
344 vst1.8
{q2}, [r6
], lr
345 vst1.8
{q4}, [r5
], lr
346 vst1.8
{q6}, [r6
], lr
347 vst1.8
{q8}, [r5
], lr
348 vst1.8
{q10}, [r6
], lr
349 vst1.8
{q12}, [r5
], lr
350 vst1.8
{q14}, [r6
], lr
352 bne copy_left_right_y_b16
354 ;Now copy the top and bottom source lines into each line of the respective borders
355 ldr r7
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
360 sub r6
, r1
, r3
;destptr2
361 sub r2
, r6
, lr
;srcptr2
362 sub r1
, r7
, r3
;srcptr1
363 sub r5
, r1
, r8
;destptr1
365 copy_top_bottom_y_b16
366 vld1.8
{q0, q1}, [r1
]!
367 vld1.8
{q8, q9}, [r2
]!
368 vld1.8
{q2, q3}, [r1
]!
369 vld1.8
{q10, q11}, [r2
]!
370 vld1.8
{q4, q5}, [r1
]!
371 vld1.8
{q12, q13}, [r2
]!
372 vld1.8
{q6, q7}, [r1
]!
373 vld1.8
{q14, q15}, [r2
]!
380 vst1.8
{q0, q1}, [r5
]!
381 vst1.8
{q8, q9}, [r6
]!
382 vst1.8
{q2, q3}, [r5
]!
383 vst1.8
{q10, q11}, [r6
]!
384 vst1.8
{q4, q5}, [r5
]!
385 vst1.8
{q12, q13}, [r6
]!
386 vst1.8
{q6, q7}, [r5
]!
387 vst1.8
{q14, q15}, [r6
]!
394 bne top_bottom_16_b16
400 bne copy_top_bottom_y_b16
402 mov r7
, lr
, lsr #
4 ;check to see if extra copy is needed
404 bne extra_top_bottom_y_b16
405 end_of_border_copy_y_b16
407 ;Border copy for U, V planes
408 ldr r1
, [r0
, #yv12_buffer_config_u_buffer
] ;srcptr1
409 mov lr
, lr
, lsr #
1 ;uv_stride
410 mov r3
, r3
, lsr #
1 ;border
411 mov r4
, r4
, lsr #
1 ;uv_height
416 ;copy the left and right most columns out
418 sub r5
, r1
, r3
;destptr1
420 sub r6
, r6
, r3
, lsl #
1 ;destptr2
421 sub r2
, r6
, #
1 ;srcptr2
425 ;Do eight rows at one time
428 copy_left_right_uv_b16
429 vld1.8
{d0[]}, [r1
], lr
430 vld1.8
{d2[]}, [r2
], lr
431 vld1.8
{d4[]}, [r1
], lr
432 vld1.8
{d6[]}, [r2
], lr
433 vld1.8
{d8[]}, [r1
], lr
434 vld1.8
{d10[]}, [r2
], lr
435 vld1.8
{d12[]}, [r1
], lr
436 vld1.8
{d14[]}, [r2
], lr
437 vld1.8
{d16[]}, [r1
], lr
438 vld1.8
{d18[]}, [r2
], lr
439 vld1.8
{d20[]}, [r1
], lr
440 vld1.8
{d22[]}, [r2
], lr
441 vld1.8
{d24[]}, [r1
], lr
442 vld1.8
{d26[]}, [r2
], lr
443 vld1.8
{d28[]}, [r1
], lr
444 vld1.8
{d30[]}, [r2
], lr
448 vst1.8
{d0}, [r5
], lr
449 vst1.8
{d2}, [r6
], lr
450 vst1.8
{d4}, [r5
], lr
451 vst1.8
{d6}, [r6
], lr
452 vst1.8
{d8}, [r5
], lr
453 vst1.8
{d10}, [r6
], lr
454 vst1.8
{d12}, [r5
], lr
455 vst1.8
{d14}, [r6
], lr
456 vst1.8
{d16}, [r5
], lr
457 vst1.8
{d18}, [r6
], lr
458 vst1.8
{d20}, [r5
], lr
459 vst1.8
{d22}, [r6
], lr
460 vst1.8
{d24}, [r5
], lr
461 vst1.8
{d26}, [r6
], lr
462 vst1.8
{d28}, [r5
], lr
463 vst1.8
{d30}, [r6
], lr
465 bne copy_left_right_uv_b16
467 ;Now copy the top and bottom source lines into each line of the respective borders
470 sub r6
, r1
, r3
;destptr2
471 sub r2
, r6
, lr
;srcptr2
472 sub r1
, r7
, r3
;srcptr1
473 sub r5
, r1
, r8
;destptr1
475 copy_top_bottom_uv_b16
476 vld1.8
{q0, q1}, [r1
]!
477 vld1.8
{q8, q9}, [r2
]!
478 vld1.8
{q2, q3}, [r1
]!
479 vld1.8
{q10, q11}, [r2
]!
486 vst1.8
{q0, q1}, [r5
]!
487 vst1.8
{q8, q9}, [r6
]!
488 vst1.8
{q2, q3}, [r5
]!
489 vst1.8
{q10, q11}, [r6
]!
502 bne copy_top_bottom_uv_b16
504 mov r7
, lr
, lsr #
3 ;check to see if extra copy is needed
506 bne extra_top_bottom_uv_b16
508 end_of_border_copy_uv_b16
510 ldrne r1
, [r0
, #yv12_buffer_config_v_buffer
] ;srcptr1
511 bne border_copy_uv_b16
516 ;;;;;;;;;;;;;;;;;;;;;;
517 ;extra copy part for Y
518 extra_top_bottom_y_b16
524 extra_top_bottom_16_b16
527 vst1.8
{q0}, [r5
], lr
528 vst1.8
{q2}, [r6
], lr
529 vst1.8
{q0}, [r5
], lr
530 vst1.8
{q2}, [r6
], lr
531 vst1.8
{q0}, [r5
], lr
532 vst1.8
{q2}, [r6
], lr
533 vst1.8
{q0}, [r5
], lr
534 vst1.8
{q2}, [r6
], lr
535 vst1.8
{q0}, [r5
], lr
536 vst1.8
{q2}, [r6
], lr
537 vst1.8
{q0}, [r5
], lr
538 vst1.8
{q2}, [r6
], lr
539 vst1.8
{q0}, [r5
], lr
540 vst1.8
{q2}, [r6
], lr
541 vst1.8
{q0}, [r5
], lr
542 vst1.8
{q2}, [r6
], lr
543 bne extra_top_bottom_16_b16
548 bne extra_top_bottom_y_b16
550 b end_of_border_copy_y_b16
552 ;extra copy part for UV
553 extra_top_bottom_uv_b16
559 extra_top_bottom_8_b16
562 vst1.8
{d0}, [r5
], lr
563 vst1.8
{d8}, [r6
], lr
564 vst1.8
{d0}, [r5
], lr
565 vst1.8
{d8}, [r6
], lr
566 vst1.8
{d0}, [r5
], lr
567 vst1.8
{d8}, [r6
], lr
568 vst1.8
{d0}, [r5
], lr
569 vst1.8
{d8}, [r6
], lr
570 vst1.8
{d0}, [r5
], lr
571 vst1.8
{d8}, [r6
], lr
572 vst1.8
{d0}, [r5
], lr
573 vst1.8
{d8}, [r6
], lr
574 vst1.8
{d0}, [r5
], lr
575 vst1.8
{d8}, [r6
], lr
576 vst1.8
{d0}, [r5
], lr
577 vst1.8
{d8}, [r6
], lr
578 bne extra_top_bottom_8_b16
583 bne extra_top_bottom_uv_b16
585 b end_of_border_copy_uv_b16