Initial WebM release
[libvpx.git] / vp8 / encoder / arm / neon / sad8_neon.asm
blob28604ddeb9072ffc9277facf8a90131f7c8c0f6a
2 ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree.
11 EXPORT |vp8_sad8x8_neon|
12 EXPORT |vp8_sad8x16_neon|
13 EXPORT |vp8_sad4x4_neon|
15 ARM
16 REQUIRE8
17 PRESERVE8
19 AREA ||.text||, CODE, READONLY, ALIGN=2
20 ; unsigned int vp8_sad8x8_c(
21 ; unsigned char *src_ptr,
22 ; int src_stride,
23 ; unsigned char *ref_ptr,
24 ; int ref_stride)
26 |vp8_sad8x8_neon| PROC
27 vld1.8 {d0}, [r0], r1
28 vld1.8 {d8}, [r2], r3
30 vld1.8 {d2}, [r0], r1
31 vld1.8 {d10}, [r2], r3
33 vabdl.u8 q12, d0, d8
35 vld1.8 {d4}, [r0], r1
36 vld1.8 {d12}, [r2], r3
38 vabal.u8 q12, d2, d10
40 vld1.8 {d6}, [r0], r1
41 vld1.8 {d14}, [r2], r3
43 vabal.u8 q12, d4, d12
45 vld1.8 {d0}, [r0], r1
46 vld1.8 {d8}, [r2], r3
48 vabal.u8 q12, d6, d14
50 vld1.8 {d2}, [r0], r1
51 vld1.8 {d10}, [r2], r3
53 vabal.u8 q12, d0, d8
55 vld1.8 {d4}, [r0], r1
56 vld1.8 {d12}, [r2], r3
58 vabal.u8 q12, d2, d10
60 vld1.8 {d6}, [r0], r1
61 vld1.8 {d14}, [r2], r3
63 vabal.u8 q12, d4, d12
64 vabal.u8 q12, d6, d14
66 vpaddl.u16 q1, q12
67 vpaddl.u32 q0, q1
68 vadd.u32 d0, d0, d1
70 vmov.32 r0, d0[0]
72 bx lr
74 ENDP
76 ;============================
77 ;unsigned int vp8_sad8x16_c(
78 ; unsigned char *src_ptr,
79 ; int src_stride,
80 ; unsigned char *ref_ptr,
81 ; int ref_stride)
83 |vp8_sad8x16_neon| PROC
84 vld1.8 {d0}, [r0], r1
85 vld1.8 {d8}, [r2], r3
87 vld1.8 {d2}, [r0], r1
88 vld1.8 {d10}, [r2], r3
90 vabdl.u8 q12, d0, d8
92 vld1.8 {d4}, [r0], r1
93 vld1.8 {d12}, [r2], r3
95 vabal.u8 q12, d2, d10
97 vld1.8 {d6}, [r0], r1
98 vld1.8 {d14}, [r2], r3
100 vabal.u8 q12, d4, d12
102 vld1.8 {d0}, [r0], r1
103 vld1.8 {d8}, [r2], r3
105 vabal.u8 q12, d6, d14
107 vld1.8 {d2}, [r0], r1
108 vld1.8 {d10}, [r2], r3
110 vabal.u8 q12, d0, d8
112 vld1.8 {d4}, [r0], r1
113 vld1.8 {d12}, [r2], r3
115 vabal.u8 q12, d2, d10
117 vld1.8 {d6}, [r0], r1
118 vld1.8 {d14}, [r2], r3
120 vabal.u8 q12, d4, d12
122 vld1.8 {d0}, [r0], r1
123 vld1.8 {d8}, [r2], r3
125 vabal.u8 q12, d6, d14
127 vld1.8 {d2}, [r0], r1
128 vld1.8 {d10}, [r2], r3
130 vabal.u8 q12, d0, d8
132 vld1.8 {d4}, [r0], r1
133 vld1.8 {d12}, [r2], r3
135 vabal.u8 q12, d2, d10
137 vld1.8 {d6}, [r0], r1
138 vld1.8 {d14}, [r2], r3
140 vabal.u8 q12, d4, d12
142 vld1.8 {d0}, [r0], r1
143 vld1.8 {d8}, [r2], r3
145 vabal.u8 q12, d6, d14
147 vld1.8 {d2}, [r0], r1
148 vld1.8 {d10}, [r2], r3
150 vabal.u8 q12, d0, d8
152 vld1.8 {d4}, [r0], r1
153 vld1.8 {d12}, [r2], r3
155 vabal.u8 q12, d2, d10
157 vld1.8 {d6}, [r0], r1
158 vld1.8 {d14}, [r2], r3
160 vabal.u8 q12, d4, d12
161 vabal.u8 q12, d6, d14
163 vpaddl.u16 q1, q12
164 vpaddl.u32 q0, q1
165 vadd.u32 d0, d0, d1
167 vmov.32 r0, d0[0]
169 bx lr
171 ENDP
173 ;===========================
174 ;unsigned int vp8_sad4x4_c(
175 ; unsigned char *src_ptr,
176 ; int src_stride,
177 ; unsigned char *ref_ptr,
178 ; int ref_stride)
180 |vp8_sad4x4_neon| PROC
181 vld1.8 {d0}, [r0], r1
182 vld1.8 {d8}, [r2], r3
184 vld1.8 {d2}, [r0], r1
185 vld1.8 {d10}, [r2], r3
187 vabdl.u8 q12, d0, d8
189 vld1.8 {d4}, [r0], r1
190 vld1.8 {d12}, [r2], r3
192 vabal.u8 q12, d2, d10
194 vld1.8 {d6}, [r0], r1
195 vld1.8 {d14}, [r2], r3
197 vabal.u8 q12, d4, d12
198 vabal.u8 q12, d6, d14
200 vpaddl.u16 d1, d24
201 vpaddl.u32 d0, d1
202 vmov.32 r0, d0[0]
204 bx lr
206 ENDP