2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_yv12_copy_frame_yonly_neon|
13 EXPORT |vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon|
19 INCLUDE asm_com_offsets.asm
21 AREA ||.text||
, CODE
, READONLY
, ALIGN=2
22 ;void vpxyv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
23 ; Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_height
24 ; are always multiples of 16.
26 |vp8_yv12_copy_frame_yonly_neon|
PROC
30 ldr r4
, [r0
, #yv12_buffer_config_y_height
]
31 ldr r5
, [r0
, #yv12_buffer_config_y_width
]
32 ldr r6
, [r0
, #yv12_buffer_config_y_stride
]
33 ldr r7
, [r1
, #yv12_buffer_config_y_stride
]
34 ldr r2
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
35 ldr r3
, [r1
, #yv12_buffer_config_y_buffer
] ;dstptr1
37 ; copy two rows at one time
40 cp_src_to_dst_height_loop
47 cp_src_to_dst_width_loop
48 vld1.8
{q0, q1}, [r8
]!
49 vld1.8
{q8, q9}, [r10
]!
50 vld1.8
{q2, q3}, [r8
]!
51 vld1.8
{q10, q11}, [r10
]!
52 vld1.8
{q4, q5}, [r8
]!
53 vld1.8
{q12, q13}, [r10
]!
54 vld1.8
{q6, q7}, [r8
]!
55 vld1.8
{q14, q15}, [r10
]!
59 vst1.8
{q0, q1}, [r9
]!
60 vst1.8
{q8, q9}, [r11
]!
61 vst1.8
{q2, q3}, [r9
]!
62 vst1.8
{q10, q11}, [r11
]!
63 vst1.8
{q4, q5}, [r9
]!
64 vst1.8
{q12, q13}, [r11
]!
65 vst1.8
{q6, q7}, [r9
]!
66 vst1.8
{q14, q15}, [r11
]!
68 bne cp_src_to_dst_width_loop
71 add r2
, r2
, r6
, lsl #
1
72 add r3
, r3
, r7
, lsl #
1
74 bne cp_src_to_dst_height_loop
76 ands r10
, r5
, #
0x7f ;check to see if extra copy is needed
78 ldr r2
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
79 ldr r3
, [r1
, #yv12_buffer_config_y_buffer
] ;dstptr1
80 bne extra_cp_src_to_dst_width
84 ;vpxyv12_extend_frame_borders_yonly
86 ;Not need to load y_width, since: y_width = y_stride - 2*border
87 ldr r3
, [r0
, #yv12_buffer_config_border
]
88 ldr r1
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
89 ldr r4
, [r0
, #yv12_buffer_config_y_height
]
90 ldr lr
, [r0
, #yv12_buffer_config_y_stride
]
93 beq b16_extend_frame_borders
95 ;=======================
96 b32_extend_frame_borders
98 ;=======================
99 ;Border copy for Y plane
100 ;copy the left and right most columns out
101 sub r5
, r1
, r3
;destptr1
103 sub r6
, r6
, r3
, lsl #
1 ;destptr2
104 sub r2
, r6
, #
1 ;srcptr2
106 ;Do four rows at one time
110 vld1.8
{d0[], d1[]}, [r1
], lr
111 vld1.8
{d4[], d5[]}, [r2
], lr
112 vld1.8
{d8[], d9[]}, [r1
], lr
113 vld1.8
{d12[], d13[]}, [r2
], lr
114 vld1.8
{d16[], d17[]}, [r1
], lr
115 vld1.8
{d20[], d21[]}, [r2
], lr
116 vld1.8
{d24[], d25[]}, [r1
], lr
117 vld1.8
{d28[], d29[]}, [r2
], lr
130 vst1.8
{q0, q1}, [r5
], lr
131 vst1.8
{q2, q3}, [r6
], lr
132 vst1.8
{q4, q5}, [r5
], lr
133 vst1.8
{q6, q7}, [r6
], lr
134 vst1.8
{q8, q9}, [r5
], lr
135 vst1.8
{q10, q11}, [r6
], lr
136 vst1.8
{q12, q13}, [r5
], lr
137 vst1.8
{q14, q15}, [r6
], lr
139 bne copy_left_right_y
141 ;Now copy the top and bottom source lines into each line of the respective borders
142 ldr r7
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
147 sub r6
, r1
, r3
;destptr2
148 sub r2
, r6
, lr
;srcptr2
149 sub r1
, r7
, r3
;srcptr1
150 sub r5
, r1
, r8
;destptr1
153 vld1.8
{q0, q1}, [r1
]!
154 vld1.8
{q8, q9}, [r2
]!
155 vld1.8
{q2, q3}, [r1
]!
156 vld1.8
{q10, q11}, [r2
]!
157 vld1.8
{q4, q5}, [r1
]!
158 vld1.8
{q12, q13}, [r2
]!
159 vld1.8
{q6, q7}, [r1
]!
160 vld1.8
{q14, q15}, [r2
]!
167 vst1.8
{q0, q1}, [r5
]!
168 vst1.8
{q8, q9}, [r6
]!
169 vst1.8
{q2, q3}, [r5
]!
170 vst1.8
{q10, q11}, [r6
]!
171 vst1.8
{q4, q5}, [r5
]!
172 vst1.8
{q12, q13}, [r6
]!
173 vst1.8
{q6, q7}, [r5
]!
174 vst1.8
{q14, q15}, [r6
]!
187 bne copy_top_bottom_y
189 mov r7
, lr
, lsr #
4 ;check to see if extra copy is needed
191 bne extra_top_bottom_y
197 ;=====================
198 ;extra copy part for Y
208 vst1.8
{q0}, [r5
], lr
209 vst1.8
{q2}, [r6
], lr
210 vst1.8
{q0}, [r5
], lr
211 vst1.8
{q2}, [r6
], lr
212 vst1.8
{q0}, [r5
], lr
213 vst1.8
{q2}, [r6
], lr
214 vst1.8
{q0}, [r5
], lr
215 vst1.8
{q2}, [r6
], lr
216 vst1.8
{q0}, [r5
], lr
217 vst1.8
{q2}, [r6
], lr
218 vst1.8
{q0}, [r5
], lr
219 vst1.8
{q2}, [r6
], lr
220 vst1.8
{q0}, [r5
], lr
221 vst1.8
{q2}, [r6
], lr
222 vst1.8
{q0}, [r5
], lr
223 vst1.8
{q2}, [r6
], lr
224 bne extra_top_bottom_32
229 bne extra_top_bottom_y
231 b end_of_border_copy_y
234 ;=======================
235 b16_extend_frame_borders
237 ;=======================
238 ;Border copy for Y plane
239 ;copy the left and right most columns out
240 sub r5
, r1
, r3
;destptr1
242 sub r6
, r6
, r3
, lsl #
1 ;destptr2
243 sub r2
, r6
, #
1 ;srcptr2
245 ;Do four rows at one time
248 copy_left_right_y_b16
249 vld1.8
{d0[], d1[]}, [r1
], lr
250 vld1.8
{d4[], d5[]}, [r2
], lr
251 vld1.8
{d8[], d9[]}, [r1
], lr
252 vld1.8
{d12[], d13[]}, [r2
], lr
253 vld1.8
{d16[], d17[]}, [r1
], lr
254 vld1.8
{d20[], d21[]}, [r2
], lr
255 vld1.8
{d24[], d25[]}, [r1
], lr
256 vld1.8
{d28[], d29[]}, [r2
], lr
260 vst1.8
{q0}, [r5
], lr
261 vst1.8
{q2}, [r6
], lr
262 vst1.8
{q4}, [r5
], lr
263 vst1.8
{q6}, [r6
], lr
264 vst1.8
{q8}, [r5
], lr
265 vst1.8
{q10}, [r6
], lr
266 vst1.8
{q12}, [r5
], lr
267 vst1.8
{q14}, [r6
], lr
269 bne copy_left_right_y_b16
271 ;Now copy the top and bottom source lines into each line of the respective borders
272 ldr r7
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
277 sub r6
, r1
, r3
;destptr2
278 sub r2
, r6
, lr
;srcptr2
279 sub r1
, r7
, r3
;srcptr1
280 sub r5
, r1
, r8
;destptr1
282 copy_top_bottom_y_b16
283 vld1.8
{q0, q1}, [r1
]!
284 vld1.8
{q8, q9}, [r2
]!
285 vld1.8
{q2, q3}, [r1
]!
286 vld1.8
{q10, q11}, [r2
]!
287 vld1.8
{q4, q5}, [r1
]!
288 vld1.8
{q12, q13}, [r2
]!
289 vld1.8
{q6, q7}, [r1
]!
290 vld1.8
{q14, q15}, [r2
]!
297 vst1.8
{q0, q1}, [r5
]!
298 vst1.8
{q8, q9}, [r6
]!
299 vst1.8
{q2, q3}, [r5
]!
300 vst1.8
{q10, q11}, [r6
]!
301 vst1.8
{q4, q5}, [r5
]!
302 vst1.8
{q12, q13}, [r6
]!
303 vst1.8
{q6, q7}, [r5
]!
304 vst1.8
{q14, q15}, [r6
]!
311 bne top_bottom_16_b16
317 bne copy_top_bottom_y_b16
319 mov r7
, lr
, lsr #
4 ;check to see if extra copy is needed
321 bne extra_top_bottom_y_b16
322 end_of_border_copy_y_b16
327 ;=====================
328 ;extra copy part for Y
329 extra_top_bottom_y_b16
335 extra_top_bottom_16_b16
338 vst1.8
{q0}, [r5
], lr
339 vst1.8
{q2}, [r6
], lr
340 vst1.8
{q0}, [r5
], lr
341 vst1.8
{q2}, [r6
], lr
342 vst1.8
{q0}, [r5
], lr
343 vst1.8
{q2}, [r6
], lr
344 vst1.8
{q0}, [r5
], lr
345 vst1.8
{q2}, [r6
], lr
346 vst1.8
{q0}, [r5
], lr
347 vst1.8
{q2}, [r6
], lr
348 vst1.8
{q0}, [r5
], lr
349 vst1.8
{q2}, [r6
], lr
350 vst1.8
{q0}, [r5
], lr
351 vst1.8
{q2}, [r6
], lr
352 vst1.8
{q0}, [r5
], lr
353 vst1.8
{q2}, [r6
], lr
354 bne extra_top_bottom_16_b16
359 bne extra_top_bottom_y_b16
361 b end_of_border_copy_y_b16
363 ;=============================
364 extra_cp_src_to_dst_width
371 extra_cp_src_to_dst_height_loop
379 extra_cp_src_to_dst_width_loop
387 bne extra_cp_src_to_dst_width_loop
391 add r2
, r2
, r6
, lsl #
1
392 add r3
, r3
, r7
, lsl #
1
394 bne extra_cp_src_to_dst_height_loop
396 b end_of_cp_src_to_dst
400 ;===========================================================
401 ;In vp8cx_pick_filter_level(), call vp8_yv12_copy_frame_yonly
402 ;without extend_frame_borders.
403 |vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon|
PROC
407 ldr r4
, [r0
, #yv12_buffer_config_y_height
]
408 ldr r5
, [r0
, #yv12_buffer_config_y_width
]
409 ldr r6
, [r0
, #yv12_buffer_config_y_stride
]
410 ldr r7
, [r1
, #yv12_buffer_config_y_stride
]
411 ldr r2
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
412 ldr r3
, [r1
, #yv12_buffer_config_y_buffer
] ;dstptr1
414 ; copy two rows at one time
417 cp_src_to_dst_height_loop1
424 cp_src_to_dst_width_loop1
425 vld1.8
{q0, q1}, [r8
]!
426 vld1.8
{q8, q9}, [r10
]!
427 vld1.8
{q2, q3}, [r8
]!
428 vld1.8
{q10, q11}, [r10
]!
429 vld1.8
{q4, q5}, [r8
]!
430 vld1.8
{q12, q13}, [r10
]!
431 vld1.8
{q6, q7}, [r8
]!
432 vld1.8
{q14, q15}, [r10
]!
436 vst1.8
{q0, q1}, [r9
]!
437 vst1.8
{q8, q9}, [r11
]!
438 vst1.8
{q2, q3}, [r9
]!
439 vst1.8
{q10, q11}, [r11
]!
440 vst1.8
{q4, q5}, [r9
]!
441 vst1.8
{q12, q13}, [r11
]!
442 vst1.8
{q6, q7}, [r9
]!
443 vst1.8
{q14, q15}, [r11
]!
445 bne cp_src_to_dst_width_loop1
448 add r2
, r2
, r6
, lsl #
1
449 add r3
, r3
, r7
, lsl #
1
451 bne cp_src_to_dst_height_loop1
453 ands r10
, r5
, #
0x7f ;check to see if extra copy is needed
455 ldr r2
, [r0
, #yv12_buffer_config_y_buffer
] ;srcptr1
456 ldr r3
, [r1
, #yv12_buffer_config_y_buffer
] ;dstptr1
457 bne extra_cp_src_to_dst_width1
458 end_of_cp_src_to_dst1
463 ;=============================
464 extra_cp_src_to_dst_width1
471 extra_cp_src_to_dst_height_loop1
479 extra_cp_src_to_dst_width_loop1
487 bne extra_cp_src_to_dst_width_loop1
491 add r2
, r2
, r6
, lsl #
1
492 add r3
, r3
, r7
, lsl #
1
494 bne extra_cp_src_to_dst_height_loop1
496 b end_of_cp_src_to_dst1