Set cpu_used range to [-16, 16] in real-time mode
[libvpx.git] / vp8 / encoder / arm / neon / sad8_neon.asm
blob23ba6df93a4dd8856e96396b0372cb646329d264
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_sad8x8_neon|
13 EXPORT |vp8_sad8x16_neon|
14 EXPORT |vp8_sad4x4_neon|
16 ARM
17 REQUIRE8
18 PRESERVE8
20 AREA ||.text||, CODE, READONLY, ALIGN=2
21 ; unsigned int vp8_sad8x8_c(
22 ; unsigned char *src_ptr,
23 ; int src_stride,
24 ; unsigned char *ref_ptr,
25 ; int ref_stride)
27 |vp8_sad8x8_neon| PROC
28 vld1.8 {d0}, [r0], r1
29 vld1.8 {d8}, [r2], r3
31 vld1.8 {d2}, [r0], r1
32 vld1.8 {d10}, [r2], r3
34 vabdl.u8 q12, d0, d8
36 vld1.8 {d4}, [r0], r1
37 vld1.8 {d12}, [r2], r3
39 vabal.u8 q12, d2, d10
41 vld1.8 {d6}, [r0], r1
42 vld1.8 {d14}, [r2], r3
44 vabal.u8 q12, d4, d12
46 vld1.8 {d0}, [r0], r1
47 vld1.8 {d8}, [r2], r3
49 vabal.u8 q12, d6, d14
51 vld1.8 {d2}, [r0], r1
52 vld1.8 {d10}, [r2], r3
54 vabal.u8 q12, d0, d8
56 vld1.8 {d4}, [r0], r1
57 vld1.8 {d12}, [r2], r3
59 vabal.u8 q12, d2, d10
61 vld1.8 {d6}, [r0], r1
62 vld1.8 {d14}, [r2], r3
64 vabal.u8 q12, d4, d12
65 vabal.u8 q12, d6, d14
67 vpaddl.u16 q1, q12
68 vpaddl.u32 q0, q1
69 vadd.u32 d0, d0, d1
71 vmov.32 r0, d0[0]
73 bx lr
75 ENDP
77 ;============================
78 ;unsigned int vp8_sad8x16_c(
79 ; unsigned char *src_ptr,
80 ; int src_stride,
81 ; unsigned char *ref_ptr,
82 ; int ref_stride)
84 |vp8_sad8x16_neon| PROC
85 vld1.8 {d0}, [r0], r1
86 vld1.8 {d8}, [r2], r3
88 vld1.8 {d2}, [r0], r1
89 vld1.8 {d10}, [r2], r3
91 vabdl.u8 q12, d0, d8
93 vld1.8 {d4}, [r0], r1
94 vld1.8 {d12}, [r2], r3
96 vabal.u8 q12, d2, d10
98 vld1.8 {d6}, [r0], r1
99 vld1.8 {d14}, [r2], r3
101 vabal.u8 q12, d4, d12
103 vld1.8 {d0}, [r0], r1
104 vld1.8 {d8}, [r2], r3
106 vabal.u8 q12, d6, d14
108 vld1.8 {d2}, [r0], r1
109 vld1.8 {d10}, [r2], r3
111 vabal.u8 q12, d0, d8
113 vld1.8 {d4}, [r0], r1
114 vld1.8 {d12}, [r2], r3
116 vabal.u8 q12, d2, d10
118 vld1.8 {d6}, [r0], r1
119 vld1.8 {d14}, [r2], r3
121 vabal.u8 q12, d4, d12
123 vld1.8 {d0}, [r0], r1
124 vld1.8 {d8}, [r2], r3
126 vabal.u8 q12, d6, d14
128 vld1.8 {d2}, [r0], r1
129 vld1.8 {d10}, [r2], r3
131 vabal.u8 q12, d0, d8
133 vld1.8 {d4}, [r0], r1
134 vld1.8 {d12}, [r2], r3
136 vabal.u8 q12, d2, d10
138 vld1.8 {d6}, [r0], r1
139 vld1.8 {d14}, [r2], r3
141 vabal.u8 q12, d4, d12
143 vld1.8 {d0}, [r0], r1
144 vld1.8 {d8}, [r2], r3
146 vabal.u8 q12, d6, d14
148 vld1.8 {d2}, [r0], r1
149 vld1.8 {d10}, [r2], r3
151 vabal.u8 q12, d0, d8
153 vld1.8 {d4}, [r0], r1
154 vld1.8 {d12}, [r2], r3
156 vabal.u8 q12, d2, d10
158 vld1.8 {d6}, [r0], r1
159 vld1.8 {d14}, [r2], r3
161 vabal.u8 q12, d4, d12
162 vabal.u8 q12, d6, d14
164 vpaddl.u16 q1, q12
165 vpaddl.u32 q0, q1
166 vadd.u32 d0, d0, d1
168 vmov.32 r0, d0[0]
170 bx lr
172 ENDP
174 ;===========================
175 ;unsigned int vp8_sad4x4_c(
176 ; unsigned char *src_ptr,
177 ; int src_stride,
178 ; unsigned char *ref_ptr,
179 ; int ref_stride)
181 |vp8_sad4x4_neon| PROC
182 vld1.8 {d0}, [r0], r1
183 vld1.8 {d8}, [r2], r3
185 vld1.8 {d2}, [r0], r1
186 vld1.8 {d10}, [r2], r3
188 vabdl.u8 q12, d0, d8
190 vld1.8 {d4}, [r0], r1
191 vld1.8 {d12}, [r2], r3
193 vabal.u8 q12, d2, d10
195 vld1.8 {d6}, [r0], r1
196 vld1.8 {d14}, [r2], r3
198 vabal.u8 q12, d4, d12
199 vabal.u8 q12, d6, d14
201 vpaddl.u16 d1, d24
202 vpaddl.u32 d0, d1
203 vmov.32 r0, d0[0]
205 bx lr
207 ENDP