Initial WebM release
[libvpx.git] / vp8 / encoder / arm / neon / vp8_memcpy_neon.asm
blobf26b4d7ae6aa4afd08601f46fa4fddfd7e2cf1f3
2 ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree.
11 EXPORT |vp8_memcpy_neon|
13 ARM
14 REQUIRE8
15 PRESERVE8
17 AREA ||.text||, CODE, READONLY, ALIGN=2
18 ;=========================================
19 ;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
20 |vp8_memcpy_neon| PROC
21 ;pld [r1] ;preload pred data
22 ;pld [r1, #128]
23 ;pld [r1, #256]
24 ;pld [r1, #384]
26 mov r12, r2, lsr #8 ;copy 256 bytes data at one time
28 memcpy_neon_loop
29 vld1.8 {q0, q1}, [r1]! ;load src data
30 subs r12, r12, #1
31 vld1.8 {q2, q3}, [r1]!
32 vst1.8 {q0, q1}, [r0]! ;copy to dst_ptr
33 vld1.8 {q4, q5}, [r1]!
34 vst1.8 {q2, q3}, [r0]!
35 vld1.8 {q6, q7}, [r1]!
36 vst1.8 {q4, q5}, [r0]!
37 vld1.8 {q8, q9}, [r1]!
38 vst1.8 {q6, q7}, [r0]!
39 vld1.8 {q10, q11}, [r1]!
40 vst1.8 {q8, q9}, [r0]!
41 vld1.8 {q12, q13}, [r1]!
42 vst1.8 {q10, q11}, [r0]!
43 vld1.8 {q14, q15}, [r1]!
44 vst1.8 {q12, q13}, [r0]!
45 vst1.8 {q14, q15}, [r0]!
47 ;pld [r1] ;preload pred data -- need to adjust for real device
48 ;pld [r1, #128]
49 ;pld [r1, #256]
50 ;pld [r1, #384]
52 bne memcpy_neon_loop
54 ands r3, r2, #0xff ;extra copy
55 beq done_copy_neon_loop
57 extra_copy_neon_loop
58 vld1.8 {q0}, [r1]! ;load src data
59 subs r3, r3, #16
60 vst1.8 {q0}, [r0]!
61 bne extra_copy_neon_loop
63 done_copy_neon_loop
64 bx lr
65 ENDP
67 END