Merge "Skip computation of distortion in vp8_pick_inter_mode if active_map is used"
[libvpx.git] / vp8 / common / arm / armv6 / copymem8x8_v6.asm
blobc6a60c610bb8deed1b1e38428dc6fc945cf8049b
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_copy_mem8x8_v6|
13 ; ARM
14 ; REQUIRE8
15 ; PRESERVE8
17 AREA Block, CODE, READONLY ; name this block of code
18 ;void copy_mem8x8_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
19 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
20 |vp8_copy_mem8x8_v6| PROC
21 ;push {r4-r5}
22 stmdb sp!, {r4-r5}
24 ;preload
25 pld [r0]
26 pld [r0, r1]
27 pld [r0, r1, lsl #1]
29 ands r4, r0, #7
30 beq copy_mem8x8_fast
32 ands r4, r0, #3
33 beq copy_mem8x8_4
35 ;copy 1 byte each time
36 ldrb r4, [r0]
37 ldrb r5, [r0, #1]
39 mov r12, #8
41 copy_mem8x8_1_loop
42 strb r4, [r2]
43 strb r5, [r2, #1]
45 ldrb r4, [r0, #2]
46 ldrb r5, [r0, #3]
48 subs r12, r12, #1
50 strb r4, [r2, #2]
51 strb r5, [r2, #3]
53 ldrb r4, [r0, #4]
54 ldrb r5, [r0, #5]
56 strb r4, [r2, #4]
57 strb r5, [r2, #5]
59 ldrb r4, [r0, #6]
60 ldrb r5, [r0, #7]
62 add r0, r0, r1
64 strb r4, [r2, #6]
65 strb r5, [r2, #7]
67 add r2, r2, r3
69 ldrneb r4, [r0]
70 ldrneb r5, [r0, #1]
72 bne copy_mem8x8_1_loop
74 ldmia sp!, {r4 - r5}
75 ;pop {r4-r5}
76 mov pc, lr
78 ;copy 4 bytes each time
79 copy_mem8x8_4
80 ldr r4, [r0]
81 ldr r5, [r0, #4]
83 mov r12, #8
85 copy_mem8x8_4_loop
86 subs r12, r12, #1
87 add r0, r0, r1
89 str r4, [r2]
90 str r5, [r2, #4]
92 add r2, r2, r3
94 ldrne r4, [r0]
95 ldrne r5, [r0, #4]
97 bne copy_mem8x8_4_loop
99 ldmia sp!, {r4 - r5}
100 ;pop {r4-r5}
101 mov pc, lr
103 ;copy 8 bytes each time
104 copy_mem8x8_fast
105 ;sub r1, r1, #8
106 ;sub r3, r3, #8
108 mov r12, #8
110 copy_mem8x8_fast_loop
111 ldmia r0, {r4-r5}
112 ;ldm r0, {r4-r5}
113 add r0, r0, r1
115 subs r12, r12, #1
116 stmia r2, {r4-r5}
117 ;stm r2, {r4-r5}
118 add r2, r2, r3
120 bne copy_mem8x8_fast_loop
122 ldmia sp!, {r4-r5}
123 ;pop {r4-r5}
124 mov pc, lr
126 ENDP ; |vp8_copy_mem8x8_v6|