2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_yv12_extend_frame_borders_neon|
17 INCLUDE asm_com_offsets.asm
19 AREA ||.text||
, CODE
, READONLY
, ALIGN=2
20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf);
21 ; we depend on VP8BORDERINPIXELS being 32
23 |vp8_yv12_extend_frame_borders_neon|
PROC
28 ldr r3
, [r0
, #yv12_buffer_config_y_width
] ; plane_width
29 ldr r1
, [r0
, #yv12_buffer_config_y_buffer
] ; src_ptr1
30 ldr r4
, [r0
, #yv12_buffer_config_y_height
] ; plane_height
31 ldr lr
, [r0
, #yv12_buffer_config_y_stride
] ; plane_stride
33 ; Border copy for Y plane
34 ; copy the left and right most columns out
35 add r6
, r1
, r3
; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width)
36 sub r2
, r6
, #
1 ; src_ptr2 = src_ptr1 + plane_width - 1
37 sub r5
, r1
, #
32 ; dest_ptr1 = src_ptr1 - Border
39 mov r12
, r4
, lsr #
2 ; plane_height / 4
42 vld1.8
{d0[], d1[]}, [r1
], lr
43 vld1.8
{d4[], d5[]}, [r2
], lr
44 vld1.8
{d8[], d9[]}, [r1
], lr
45 vld1.8
{d12[], d13[]}, [r2
], lr
46 vld1.8
{d16[], d17[]}, [r1
], lr
47 vld1.8
{d20[], d21[]}, [r2
], lr
48 vld1.8
{d24[], d25[]}, [r1
], lr
49 vld1.8
{d28[], d29[]}, [r2
], lr
62 vst1.8
{q0, q1}, [r5
], lr
63 vst1.8
{q2, q3}, [r6
], lr
64 vst1.8
{q4, q5}, [r5
], lr
65 vst1.8
{q6, q7}, [r6
], lr
66 vst1.8
{q8, q9}, [r5
], lr
67 vst1.8
{q10, q11}, [r6
], lr
68 vst1.8
{q12, q13}, [r5
], lr
69 vst1.8
{q14, q15}, [r6
], lr
73 ;Now copy the top and bottom source lines into each line of the respective borders
74 ldr r1
, [r0
, #yv12_buffer_config_y_buffer
] ; y_buffer
75 mul r8
, r4
, lr
; plane_height * plane_stride
77 ; copy width is plane_stride
78 mov r12
, lr
, lsr #
7 ; plane_stride / 128
80 sub r1
, r1
, #
32 ; src_ptr1 = y_buffer - Border
81 add r6
, r1
, r8
; dest_ptr2 = src_ptr2 - plane_stride (src_ptr1 + (plane_height * plane_stride))
82 sub r2
, r6
, lr
; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
83 sub r5
, r1
, lr
, asl #
5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
86 vld1.8
{q0, q1}, [r1
]!
87 vld1.8
{q8, q9}, [r2
]!
88 vld1.8
{q2, q3}, [r1
]!
89 vld1.8
{q10, q11}, [r2
]!
90 vld1.8
{q4, q5}, [r1
]!
91 vld1.8
{q12, q13}, [r2
]!
92 vld1.8
{q6, q7}, [r1
]!
93 vld1.8
{q14, q15}, [r2
]!
100 vst1.8
{q0, q1}, [r5
]!
101 vst1.8
{q8, q9}, [r6
]!
102 vst1.8
{q2, q3}, [r5
]!
103 vst1.8
{q10, q11}, [r6
]!
104 vst1.8
{q4, q5}, [r5
]!
105 vst1.8
{q12, q13}, [r6
]!
106 vst1.8
{q6, q7}, [r5
]!
107 vst1.8
{q14, q15}, [r6
]!
109 add r5
, r5
, lr
; dest_ptr1 += plane_stride
110 sub r5
, r5
, #
128 ; dest_ptr1 -= 128
111 add r6
, r6
, lr
; dest_ptr2 += plane_stride
112 sub r6
, r6
, #
128 ; dest_ptr2 -= 128
116 sub r5
, r1
, lr
, asl #
5 ; src_ptr1 - (Border* plane_stride)
117 add r6
, r2
, lr
; src_ptr2 + plane_stride
120 bne copy_top_bottom_y
122 mov r7
, lr
, lsr #
4 ; check to see if extra copy is needed
124 bne extra_top_bottom_y
127 ;Border copy for U, V planes
129 ldr r7
, [r0
, #yv12_buffer_config_u_buffer
] ; src_ptr1
130 ldr lr
, [r0
, #yv12_buffer_config_uv_stride
] ; plane_stride
131 ldr r3
, [r0
, #yv12_buffer_config_uv_width
] ; plane_width
132 ldr r4
, [r0
, #yv12_buffer_config_uv_height
] ; plane_height
136 ;copy the left and right most columns out
138 mov r1
, r7
; src_ptr1 needs to be saved for second half of loop
139 sub r5
, r1
, #
16 ; dest_ptr1 = src_ptr1 - Border
140 add r6
, r1
, r3
; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width)
141 sub r2
, r6
, #
1 ; src_ptr2 = src_ptr1 + plane_width - 1
143 mov r12
, r4
, lsr #
3 ; plane_height / 8
146 vld1.8
{d0[], d1[]}, [r1
], lr
147 vld1.8
{d2[], d3[]}, [r2
], lr
148 vld1.8
{d4[], d5[]}, [r1
], lr
149 vld1.8
{d6[], d7[]}, [r2
], lr
150 vld1.8
{d8[], d9[]}, [r1
], lr
151 vld1.8
{d10[], d11[]}, [r2
], lr
152 vld1.8
{d12[], d13[]}, [r1
], lr
153 vld1.8
{d14[], d15[]}, [r2
], lr
154 vld1.8
{d16[], d17[]}, [r1
], lr
155 vld1.8
{d18[], d19[]}, [r2
], lr
156 vld1.8
{d20[], d21[]}, [r1
], lr
157 vld1.8
{d22[], d23[]}, [r2
], lr
158 vld1.8
{d24[], d25[]}, [r1
], lr
159 vld1.8
{d26[], d27[]}, [r2
], lr
160 vld1.8
{d28[], d29[]}, [r1
], lr
161 vld1.8
{d30[], d31[]}, [r2
], lr
165 vst1.8
{q0}, [r5
], lr
166 vst1.8
{q1}, [r6
], lr
167 vst1.8
{q2}, [r5
], lr
168 vst1.8
{q3}, [r6
], lr
169 vst1.8
{q4}, [r5
], lr
170 vst1.8
{q5}, [r6
], lr
171 vst1.8
{q6}, [r5
], lr
172 vst1.8
{q7}, [r6
], lr
173 vst1.8
{q8}, [r5
], lr
174 vst1.8
{q9}, [r6
], lr
175 vst1.8
{q10}, [r5
], lr
176 vst1.8
{q11}, [r6
], lr
177 vst1.8
{q12}, [r5
], lr
178 vst1.8
{q13}, [r6
], lr
179 vst1.8
{q14}, [r5
], lr
180 vst1.8
{q15}, [r6
], lr
182 bne copy_left_right_uv
184 ;Now copy the top and bottom source lines into each line of the respective borders
186 mul r8
, r4
, lr
; plane_height * plane_stride
187 mov r12
, lr
, lsr #
6 ; plane_stride / 64
189 sub r1
, r1
, #
16 ; src_ptr1 = u_buffer - Border
190 add r6
, r1
, r8
; dest_ptr2 = src_ptr2 + plane_stride (src_ptr1 + (plane_height * plane_stride)
191 sub r2
, r6
, lr
; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride
192 sub r5
, r1
, lr
, asl #
4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
195 vld1.8
{q0, q1}, [r1
]!
196 vld1.8
{q8, q9}, [r2
]!
197 vld1.8
{q2, q3}, [r1
]!
198 vld1.8
{q10, q11}, [r2
]!
205 vst1.8
{q0, q1}, [r5
]!
206 vst1.8
{q8, q9}, [r6
]!
207 vst1.8
{q2, q3}, [r5
]!
208 vst1.8
{q10, q11}, [r6
]!
210 add r5
, r5
, lr
; dest_ptr1 += plane_stride
212 add r6
, r6
, lr
; dest_ptr2 += plane_stride
217 sub r5
, r1
, lr
, asl #
4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride)
218 add r6
, r2
, lr
; dest_ptr2 = src_ptr2 + plane_stride
221 bne copy_top_bottom_uv
223 mov r7
, lr
, lsr #
3 ; check to see if extra copy is needed
225 bne extra_top_bottom_uv
227 end_of_border_copy_uv
229 ldrne r7
, [r0
, #yv12_buffer_config_v_buffer
] ; src_ptr1
235 ;;;;;;;;;;;;;;;;;;;;;;
245 vst1.8
{q0}, [r5
], lr
246 vst1.8
{q2}, [r6
], lr
247 vst1.8
{q0}, [r5
], lr
248 vst1.8
{q2}, [r6
], lr
249 vst1.8
{q0}, [r5
], lr
250 vst1.8
{q2}, [r6
], lr
251 vst1.8
{q0}, [r5
], lr
252 vst1.8
{q2}, [r6
], lr
253 vst1.8
{q0}, [r5
], lr
254 vst1.8
{q2}, [r6
], lr
255 vst1.8
{q0}, [r5
], lr
256 vst1.8
{q2}, [r6
], lr
257 vst1.8
{q0}, [r5
], lr
258 vst1.8
{q2}, [r6
], lr
259 vst1.8
{q0}, [r5
], lr
260 vst1.8
{q2}, [r6
], lr
261 bne extra_top_bottom_32
263 sub r5
, r1
, lr
, asl #
5 ; src_ptr1 - (Border * plane_stride)
264 add r6
, r2
, lr
; src_ptr2 + plane_stride
266 bne extra_top_bottom_y
268 b end_of_border_copy_y
279 vst1.8
{d0}, [r5
], lr
280 vst1.8
{d8}, [r6
], lr
281 vst1.8
{d0}, [r5
], lr
282 vst1.8
{d8}, [r6
], lr
283 vst1.8
{d0}, [r5
], lr
284 vst1.8
{d8}, [r6
], lr
285 vst1.8
{d0}, [r5
], lr
286 vst1.8
{d8}, [r6
], lr
287 vst1.8
{d0}, [r5
], lr
288 vst1.8
{d8}, [r6
], lr
289 vst1.8
{d0}, [r5
], lr
290 vst1.8
{d8}, [r6
], lr
291 vst1.8
{d0}, [r5
], lr
292 vst1.8
{d8}, [r6
], lr
293 vst1.8
{d0}, [r5
], lr
294 vst1.8
{d8}, [r6
], lr
295 bne extra_top_bottom_16
297 sub r5
, r1
, lr
, asl #
4 ; src_ptr1 - (Border * plane_stride)
298 add r6
, r2
, lr
; src_ptr2 + plane_stride
300 bne extra_top_bottom_uv
302 b end_of_border_copy_uv