Removed unused vp8_recon_intra4x4mb function
[libvpx.git] / vp8 / encoder / x86 / sad_sse2.asm
blob39ed796049bc0b5c01dbdd0b99e72106f20081c3
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
14 ;unsigned int vp8_sad16x16_wmt(
15 ; unsigned char *src_ptr,
16 ; int src_stride,
17 ; unsigned char *ref_ptr,
18 ; int ref_stride)
19 global sym(vp8_sad16x16_wmt)
20 sym(vp8_sad16x16_wmt):
21 push rbp
22 mov rbp, rsp
23 SHADOW_ARGS_TO_STACK 4
24 push rsi
25 push rdi
26 ; end prolog
28 mov rsi, arg(0) ;src_ptr
29 mov rdi, arg(2) ;ref_ptr
31 movsxd rax, dword ptr arg(1) ;src_stride
32 movsxd rdx, dword ptr arg(3) ;ref_stride
34 lea rcx, [rsi+rax*8]
36 lea rcx, [rcx+rax*8]
37 pxor xmm7, xmm7
39 x16x16sad_wmt_loop:
41 movq xmm0, QWORD PTR [rsi]
42 movq xmm2, QWORD PTR [rsi+8]
44 movq xmm1, QWORD PTR [rdi]
45 movq xmm3, QWORD PTR [rdi+8]
47 movq xmm4, QWORD PTR [rsi+rax]
48 movq xmm5, QWORD PTR [rdi+rdx]
51 punpcklbw xmm0, xmm2
52 punpcklbw xmm1, xmm3
54 psadbw xmm0, xmm1
55 movq xmm6, QWORD PTR [rsi+rax+8]
57 movq xmm3, QWORD PTR [rdi+rdx+8]
58 lea rsi, [rsi+rax*2]
60 lea rdi, [rdi+rdx*2]
61 punpcklbw xmm4, xmm6
63 punpcklbw xmm5, xmm3
64 psadbw xmm4, xmm5
66 paddw xmm7, xmm0
67 paddw xmm7, xmm4
69 cmp rsi, rcx
70 jne x16x16sad_wmt_loop
72 movq xmm0, xmm7
73 psrldq xmm7, 8
75 paddw xmm0, xmm7
76 movq rax, xmm0
78 ; begin epilog
79 pop rdi
80 pop rsi
81 UNSHADOW_ARGS
82 pop rbp
83 ret
85 ;unsigned int vp8_sad8x16_wmt(
86 ; unsigned char *src_ptr,
87 ; int src_stride,
88 ; unsigned char *ref_ptr,
89 ; int ref_stride,
90 ; int max_err)
91 global sym(vp8_sad8x16_wmt)
92 sym(vp8_sad8x16_wmt):
93 push rbp
94 mov rbp, rsp
95 SHADOW_ARGS_TO_STACK 5
96 push rbx
97 push rsi
98 push rdi
99 ; end prolog
101 mov rsi, arg(0) ;src_ptr
102 mov rdi, arg(2) ;ref_ptr
104 movsxd rbx, dword ptr arg(1) ;src_stride
105 movsxd rdx, dword ptr arg(3) ;ref_stride
107 lea rcx, [rsi+rbx*8]
109 lea rcx, [rcx+rbx*8]
110 pxor mm7, mm7
112 x8x16sad_wmt_loop:
114 movq rax, mm7
115 cmp rax, arg(4)
116 jg x8x16sad_wmt_early_exit
118 movq mm0, QWORD PTR [rsi]
119 movq mm1, QWORD PTR [rdi]
121 movq mm2, QWORD PTR [rsi+rbx]
122 movq mm3, QWORD PTR [rdi+rdx]
124 psadbw mm0, mm1
125 psadbw mm2, mm3
127 lea rsi, [rsi+rbx*2]
128 lea rdi, [rdi+rdx*2]
130 paddw mm7, mm0
131 paddw mm7, mm2
133 cmp rsi, rcx
134 jne x8x16sad_wmt_loop
136 movq rax, mm7
138 x8x16sad_wmt_early_exit:
140 ; begin epilog
141 pop rdi
142 pop rsi
143 pop rbx
144 UNSHADOW_ARGS
145 pop rbp
149 ;unsigned int vp8_sad8x8_wmt(
150 ; unsigned char *src_ptr,
151 ; int src_stride,
152 ; unsigned char *ref_ptr,
153 ; int ref_stride)
154 global sym(vp8_sad8x8_wmt)
155 sym(vp8_sad8x8_wmt):
156 push rbp
157 mov rbp, rsp
158 SHADOW_ARGS_TO_STACK 5
159 push rbx
160 push rsi
161 push rdi
162 ; end prolog
164 mov rsi, arg(0) ;src_ptr
165 mov rdi, arg(2) ;ref_ptr
167 movsxd rbx, dword ptr arg(1) ;src_stride
168 movsxd rdx, dword ptr arg(3) ;ref_stride
170 lea rcx, [rsi+rbx*8]
171 pxor mm7, mm7
173 x8x8sad_wmt_loop:
175 movq rax, mm7
176 cmp rax, arg(4)
177 jg x8x8sad_wmt_early_exit
179 movq mm0, QWORD PTR [rsi]
180 movq mm1, QWORD PTR [rdi]
182 psadbw mm0, mm1
183 lea rsi, [rsi+rbx]
185 add rdi, rdx
186 paddw mm7, mm0
188 cmp rsi, rcx
189 jne x8x8sad_wmt_loop
191 movq rax, mm7
192 x8x8sad_wmt_early_exit:
194 ; begin epilog
195 pop rdi
196 pop rsi
197 pop rbx
198 UNSHADOW_ARGS
199 pop rbp
202 ;unsigned int vp8_sad4x4_wmt(
203 ; unsigned char *src_ptr,
204 ; int src_stride,
205 ; unsigned char *ref_ptr,
206 ; int ref_stride)
207 global sym(vp8_sad4x4_wmt)
208 sym(vp8_sad4x4_wmt):
209 push rbp
210 mov rbp, rsp
211 SHADOW_ARGS_TO_STACK 4
212 push rsi
213 push rdi
214 ; end prolog
216 mov rsi, arg(0) ;src_ptr
217 mov rdi, arg(2) ;ref_ptr
219 movsxd rax, dword ptr arg(1) ;src_stride
220 movsxd rdx, dword ptr arg(3) ;ref_stride
222 movd mm0, DWORD PTR [rsi]
223 movd mm1, DWORD PTR [rdi]
225 movd mm2, DWORD PTR [rsi+rax]
226 movd mm3, DWORD PTR [rdi+rdx]
228 punpcklbw mm0, mm2
229 punpcklbw mm1, mm3
231 psadbw mm0, mm1
232 lea rsi, [rsi+rax*2]
234 lea rdi, [rdi+rdx*2]
235 movd mm4, DWORD PTR [rsi]
237 movd mm5, DWORD PTR [rdi]
238 movd mm6, DWORD PTR [rsi+rax]
240 movd mm7, DWORD PTR [rdi+rdx]
241 punpcklbw mm4, mm6
243 punpcklbw mm5, mm7
244 psadbw mm4, mm5
246 paddw mm0, mm4
247 movq rax, mm0
249 ; begin epilog
250 pop rdi
251 pop rsi
252 UNSHADOW_ARGS
253 pop rbp
257 ;unsigned int vp8_sad16x8_wmt(
258 ; unsigned char *src_ptr,
259 ; int src_stride,
260 ; unsigned char *ref_ptr,
261 ; int ref_stride)
262 global sym(vp8_sad16x8_wmt)
263 sym(vp8_sad16x8_wmt):
264 push rbp
265 mov rbp, rsp
266 SHADOW_ARGS_TO_STACK 5
267 push rbx
268 push rsi
269 push rdi
270 ; end prolog
273 mov rsi, arg(0) ;src_ptr
274 mov rdi, arg(2) ;ref_ptr
276 movsxd rbx, dword ptr arg(1) ;src_stride
277 movsxd rdx, dword ptr arg(3) ;ref_stride
279 lea rcx, [rsi+rbx*8]
280 pxor mm7, mm7
282 x16x8sad_wmt_loop:
284 movq rax, mm7
285 cmp rax, arg(4)
286 jg x16x8sad_wmt_early_exit
288 movq mm0, QWORD PTR [rsi]
289 movq mm2, QWORD PTR [rsi+8]
291 movq mm1, QWORD PTR [rdi]
292 movq mm3, QWORD PTR [rdi+8]
294 movq mm4, QWORD PTR [rsi+rbx]
295 movq mm5, QWORD PTR [rdi+rdx]
297 psadbw mm0, mm1
298 psadbw mm2, mm3
300 movq mm1, QWORD PTR [rsi+rbx+8]
301 movq mm3, QWORD PTR [rdi+rdx+8]
303 psadbw mm4, mm5
304 psadbw mm1, mm3
306 lea rsi, [rsi+rbx*2]
307 lea rdi, [rdi+rdx*2]
309 paddw mm0, mm2
310 paddw mm4, mm1
312 paddw mm7, mm0
313 paddw mm7, mm4
315 cmp rsi, rcx
316 jne x16x8sad_wmt_loop
318 movq rax, mm7
320 x16x8sad_wmt_early_exit:
322 ; begin epilog
323 pop rdi
324 pop rsi
325 pop rbx
326 UNSHADOW_ARGS
327 pop rbp