2 ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree.
11 EXPORT |vp8_memcpy_neon|
17 AREA ||.text||
, CODE
, READONLY
, ALIGN=2
18 ;=========================================
19 ;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
20 |vp8_memcpy_neon|
PROC
21 ;pld [r1] ;preload pred data
26 mov r12
, r2
, lsr #
8 ;copy 256 bytes data at one time
29 vld1.8
{q0, q1}, [r1
]! ;load src data
31 vld1.8
{q2, q3}, [r1
]!
32 vst1.8
{q0, q1}, [r0
]! ;copy to dst_ptr
33 vld1.8
{q4, q5}, [r1
]!
34 vst1.8
{q2, q3}, [r0
]!
35 vld1.8
{q6, q7}, [r1
]!
36 vst1.8
{q4, q5}, [r0
]!
37 vld1.8
{q8, q9}, [r1
]!
38 vst1.8
{q6, q7}, [r0
]!
39 vld1.8
{q10, q11}, [r1
]!
40 vst1.8
{q8, q9}, [r0
]!
41 vld1.8
{q12, q13}, [r1
]!
42 vst1.8
{q10, q11}, [r0
]!
43 vld1.8
{q14, q15}, [r1
]!
44 vst1.8
{q12, q13}, [r0
]!
45 vst1.8
{q14, q15}, [r0
]!
47 ;pld [r1] ;preload pred data -- need to adjust for real device
54 ands r3
, r2
, #
0xff ;extra copy
55 beq done_copy_neon_loop
58 vld1.8
{q0}, [r1
]! ;load src data
61 bne extra_copy_neon_loop