Merge "Skip computation of distortion in vp8_pick_inter_mode if active_map is used"
[libvpx.git] / vp8 / common / arm / armv6 / copymem16x16_v6.asm
blobabf048c2fa88b4b4bee0c9e6288769b9072ccd07
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_copy_mem16x16_v6|
13 ; ARM
14 ; REQUIRE8
15 ; PRESERVE8
17 AREA Block, CODE, READONLY ; name this block of code
18 ;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
19 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
20 |vp8_copy_mem16x16_v6| PROC
21 stmdb sp!, {r4 - r7}
22 ;push {r4-r7}
24 ;preload
25 pld [r0, #31] ; preload for next 16x16 block
27 ands r4, r0, #15
28 beq copy_mem16x16_fast
30 ands r4, r0, #7
31 beq copy_mem16x16_8
33 ands r4, r0, #3
34 beq copy_mem16x16_4
36 ;copy one byte each time
37 ldrb r4, [r0]
38 ldrb r5, [r0, #1]
39 ldrb r6, [r0, #2]
40 ldrb r7, [r0, #3]
42 mov r12, #16
44 copy_mem16x16_1_loop
45 strb r4, [r2]
46 strb r5, [r2, #1]
47 strb r6, [r2, #2]
48 strb r7, [r2, #3]
50 ldrb r4, [r0, #4]
51 ldrb r5, [r0, #5]
52 ldrb r6, [r0, #6]
53 ldrb r7, [r0, #7]
55 subs r12, r12, #1
57 strb r4, [r2, #4]
58 strb r5, [r2, #5]
59 strb r6, [r2, #6]
60 strb r7, [r2, #7]
62 ldrb r4, [r0, #8]
63 ldrb r5, [r0, #9]
64 ldrb r6, [r0, #10]
65 ldrb r7, [r0, #11]
67 strb r4, [r2, #8]
68 strb r5, [r2, #9]
69 strb r6, [r2, #10]
70 strb r7, [r2, #11]
72 ldrb r4, [r0, #12]
73 ldrb r5, [r0, #13]
74 ldrb r6, [r0, #14]
75 ldrb r7, [r0, #15]
77 add r0, r0, r1
79 strb r4, [r2, #12]
80 strb r5, [r2, #13]
81 strb r6, [r2, #14]
82 strb r7, [r2, #15]
84 add r2, r2, r3
86 ldrneb r4, [r0]
87 ldrneb r5, [r0, #1]
88 ldrneb r6, [r0, #2]
89 ldrneb r7, [r0, #3]
91 pld [r0, #31] ; preload for next 16x16 block
93 bne copy_mem16x16_1_loop
95 ldmia sp!, {r4 - r7}
96 ;pop {r4-r7}
97 mov pc, lr
99 ;copy 4 bytes each time
100 copy_mem16x16_4
101 ldr r4, [r0]
102 ldr r5, [r0, #4]
103 ldr r6, [r0, #8]
104 ldr r7, [r0, #12]
106 mov r12, #16
108 copy_mem16x16_4_loop
109 subs r12, r12, #1
110 add r0, r0, r1
112 str r4, [r2]
113 str r5, [r2, #4]
114 str r6, [r2, #8]
115 str r7, [r2, #12]
117 add r2, r2, r3
119 ldrne r4, [r0]
120 ldrne r5, [r0, #4]
121 ldrne r6, [r0, #8]
122 ldrne r7, [r0, #12]
124 pld [r0, #31] ; preload for next 16x16 block
126 bne copy_mem16x16_4_loop
128 ldmia sp!, {r4 - r7}
129 ;pop {r4-r7}
130 mov pc, lr
132 ;copy 8 bytes each time
133 copy_mem16x16_8
134 sub r1, r1, #16
135 sub r3, r3, #16
137 mov r12, #16
139 copy_mem16x16_8_loop
140 ldmia r0!, {r4-r5}
141 ;ldm r0, {r4-r5}
142 ldmia r0!, {r6-r7}
144 add r0, r0, r1
146 stmia r2!, {r4-r5}
147 subs r12, r12, #1
148 ;stm r2, {r4-r5}
149 stmia r2!, {r6-r7}
151 add r2, r2, r3
153 pld [r0, #31] ; preload for next 16x16 block
154 bne copy_mem16x16_8_loop
156 ldmia sp!, {r4 - r7}
157 ;pop {r4-r7}
158 mov pc, lr
160 ;copy 16 bytes each time
161 copy_mem16x16_fast
162 ;sub r1, r1, #16
163 ;sub r3, r3, #16
165 mov r12, #16
167 copy_mem16x16_fast_loop
168 ldmia r0, {r4-r7}
169 ;ldm r0, {r4-r7}
170 add r0, r0, r1
172 subs r12, r12, #1
173 stmia r2, {r4-r7}
174 ;stm r2, {r4-r7}
175 add r2, r2, r3
177 pld [r0, #31] ; preload for next 16x16 block
178 bne copy_mem16x16_fast_loop
180 ldmia sp!, {r4 - r7}
181 ;pop {r4-r7}
182 mov pc, lr
184 ENDP ; |vp8_copy_mem16x16_v6|