Removed unused vp8_recon_intra4x4mb function
[libvpx.git] / vp8 / encoder / x86 / sad_mmx.asm
blob85cb023a48771c3165d924497d8e553a96f6186f
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
14 global sym(vp8_sad16x16_mmx)
15 global sym(vp8_sad8x16_mmx)
16 global sym(vp8_sad8x8_mmx)
17 global sym(vp8_sad4x4_mmx)
18 global sym(vp8_sad16x8_mmx)
20 ;unsigned int vp8_sad16x16_mmx(
21 ; unsigned char *src_ptr,
22 ; int src_stride,
23 ; unsigned char *ref_ptr,
24 ; int ref_stride)
25 sym(vp8_sad16x16_mmx):
26 push rbp
27 mov rbp, rsp
28 SHADOW_ARGS_TO_STACK 4
29 push rsi
30 push rdi
31 ; end prolog
33 mov rsi, arg(0) ;src_ptr
34 mov rdi, arg(2) ;ref_ptr
36 movsxd rax, dword ptr arg(1) ;src_stride
37 movsxd rdx, dword ptr arg(3) ;ref_stride
39 lea rcx, [rsi+rax*8]
41 lea rcx, [rcx+rax*8]
42 pxor mm7, mm7
44 pxor mm6, mm6
46 x16x16sad_mmx_loop:
48 movq mm0, QWORD PTR [rsi]
49 movq mm2, QWORD PTR [rsi+8]
51 movq mm1, QWORD PTR [rdi]
52 movq mm3, QWORD PTR [rdi+8]
54 movq mm4, mm0
55 movq mm5, mm2
57 psubusb mm0, mm1
58 psubusb mm1, mm4
60 psubusb mm2, mm3
61 psubusb mm3, mm5
63 por mm0, mm1
64 por mm2, mm3
66 movq mm1, mm0
67 movq mm3, mm2
69 punpcklbw mm0, mm6
70 punpcklbw mm2, mm6
72 punpckhbw mm1, mm6
73 punpckhbw mm3, mm6
75 paddw mm0, mm2
76 paddw mm1, mm3
79 lea rsi, [rsi+rax]
80 add rdi, rdx
82 paddw mm7, mm0
83 paddw mm7, mm1
85 cmp rsi, rcx
86 jne x16x16sad_mmx_loop
89 movq mm0, mm7
91 punpcklwd mm0, mm6
92 punpckhwd mm7, mm6
94 paddw mm0, mm7
95 movq mm7, mm0
98 psrlq mm0, 32
99 paddw mm7, mm0
101 movq rax, mm7
103 pop rdi
104 pop rsi
105 mov rsp, rbp
106 ; begin epilog
107 UNSHADOW_ARGS
108 pop rbp
112 ;unsigned int vp8_sad8x16_mmx(
113 ; unsigned char *src_ptr,
114 ; int src_stride,
115 ; unsigned char *ref_ptr,
116 ; int ref_stride)
117 sym(vp8_sad8x16_mmx):
118 push rbp
119 mov rbp, rsp
120 SHADOW_ARGS_TO_STACK 4
121 push rsi
122 push rdi
123 ; end prolog
125 mov rsi, arg(0) ;src_ptr
126 mov rdi, arg(2) ;ref_ptr
128 movsxd rax, dword ptr arg(1) ;src_stride
129 movsxd rdx, dword ptr arg(3) ;ref_stride
131 lea rcx, [rsi+rax*8]
133 lea rcx, [rcx+rax*8]
134 pxor mm7, mm7
136 pxor mm6, mm6
138 x8x16sad_mmx_loop:
140 movq mm0, QWORD PTR [rsi]
141 movq mm1, QWORD PTR [rdi]
143 movq mm2, mm0
144 psubusb mm0, mm1
146 psubusb mm1, mm2
147 por mm0, mm1
149 movq mm2, mm0
150 punpcklbw mm0, mm6
152 punpckhbw mm2, mm6
153 lea rsi, [rsi+rax]
155 add rdi, rdx
156 paddw mm7, mm0
158 paddw mm7, mm2
159 cmp rsi, rcx
161 jne x8x16sad_mmx_loop
163 movq mm0, mm7
164 punpcklwd mm0, mm6
166 punpckhwd mm7, mm6
167 paddw mm0, mm7
169 movq mm7, mm0
170 psrlq mm0, 32
172 paddw mm7, mm0
173 movq rax, mm7
175 pop rdi
176 pop rsi
177 mov rsp, rbp
178 ; begin epilog
179 UNSHADOW_ARGS
180 pop rbp
184 ;unsigned int vp8_sad8x8_mmx(
185 ; unsigned char *src_ptr,
186 ; int src_stride,
187 ; unsigned char *ref_ptr,
188 ; int ref_stride)
189 sym(vp8_sad8x8_mmx):
190 push rbp
191 mov rbp, rsp
192 SHADOW_ARGS_TO_STACK 4
193 push rsi
194 push rdi
195 ; end prolog
197 mov rsi, arg(0) ;src_ptr
198 mov rdi, arg(2) ;ref_ptr
200 movsxd rax, dword ptr arg(1) ;src_stride
201 movsxd rdx, dword ptr arg(3) ;ref_stride
203 lea rcx, [rsi+rax*8]
204 pxor mm7, mm7
206 pxor mm6, mm6
208 x8x8sad_mmx_loop:
210 movq mm0, QWORD PTR [rsi]
211 movq mm1, QWORD PTR [rdi]
213 movq mm2, mm0
214 psubusb mm0, mm1
216 psubusb mm1, mm2
217 por mm0, mm1
219 movq mm2, mm0
220 punpcklbw mm0, mm6
222 punpckhbw mm2, mm6
223 paddw mm0, mm2
225 lea rsi, [rsi+rax]
226 add rdi, rdx
228 paddw mm7, mm0
229 cmp rsi, rcx
231 jne x8x8sad_mmx_loop
233 movq mm0, mm7
234 punpcklwd mm0, mm6
236 punpckhwd mm7, mm6
237 paddw mm0, mm7
239 movq mm7, mm0
240 psrlq mm0, 32
242 paddw mm7, mm0
243 movq rax, mm7
245 pop rdi
246 pop rsi
247 mov rsp, rbp
248 ; begin epilog
249 UNSHADOW_ARGS
250 pop rbp
254 ;unsigned int vp8_sad4x4_mmx(
255 ; unsigned char *src_ptr,
256 ; int src_stride,
257 ; unsigned char *ref_ptr,
258 ; int ref_stride)
259 sym(vp8_sad4x4_mmx):
260 push rbp
261 mov rbp, rsp
262 SHADOW_ARGS_TO_STACK 4
263 push rsi
264 push rdi
265 ; end prolog
267 mov rsi, arg(0) ;src_ptr
268 mov rdi, arg(2) ;ref_ptr
270 movsxd rax, dword ptr arg(1) ;src_stride
271 movsxd rdx, dword ptr arg(3) ;ref_stride
273 movd mm0, DWORD PTR [rsi]
274 movd mm1, DWORD PTR [rdi]
276 movd mm2, DWORD PTR [rsi+rax]
277 movd mm3, DWORD PTR [rdi+rdx]
279 punpcklbw mm0, mm2
280 punpcklbw mm1, mm3
282 movq mm2, mm0
283 psubusb mm0, mm1
285 psubusb mm1, mm2
286 por mm0, mm1
288 movq mm2, mm0
289 pxor mm3, mm3
291 punpcklbw mm0, mm3
292 punpckhbw mm2, mm3
294 paddw mm0, mm2
296 lea rsi, [rsi+rax*2]
297 lea rdi, [rdi+rdx*2]
299 movd mm4, DWORD PTR [rsi]
300 movd mm5, DWORD PTR [rdi]
302 movd mm6, DWORD PTR [rsi+rax]
303 movd mm7, DWORD PTR [rdi+rdx]
305 punpcklbw mm4, mm6
306 punpcklbw mm5, mm7
308 movq mm6, mm4
309 psubusb mm4, mm5
311 psubusb mm5, mm6
312 por mm4, mm5
314 movq mm5, mm4
315 punpcklbw mm4, mm3
317 punpckhbw mm5, mm3
318 paddw mm4, mm5
320 paddw mm0, mm4
321 movq mm1, mm0
323 punpcklwd mm0, mm3
324 punpckhwd mm1, mm3
326 paddw mm0, mm1
327 movq mm1, mm0
329 psrlq mm0, 32
330 paddw mm0, mm1
332 movq rax, mm0
334 pop rdi
335 pop rsi
336 mov rsp, rbp
337 ; begin epilog
338 UNSHADOW_ARGS
339 pop rbp
343 ;unsigned int vp8_sad16x8_mmx(
344 ; unsigned char *src_ptr,
345 ; int src_stride,
346 ; unsigned char *ref_ptr,
347 ; int ref_stride)
348 sym(vp8_sad16x8_mmx):
349 push rbp
350 mov rbp, rsp
351 SHADOW_ARGS_TO_STACK 4
352 push rsi
353 push rdi
354 ; end prolog
356 mov rsi, arg(0) ;src_ptr
357 mov rdi, arg(2) ;ref_ptr
359 movsxd rax, dword ptr arg(1) ;src_stride
360 movsxd rdx, dword ptr arg(3) ;ref_stride
362 lea rcx, [rsi+rax*8]
363 pxor mm7, mm7
365 pxor mm6, mm6
367 x16x8sad_mmx_loop:
369 movq mm0, [rsi]
370 movq mm1, [rdi]
372 movq mm2, [rsi+8]
373 movq mm3, [rdi+8]
375 movq mm4, mm0
376 movq mm5, mm2
378 psubusb mm0, mm1
379 psubusb mm1, mm4
381 psubusb mm2, mm3
382 psubusb mm3, mm5
384 por mm0, mm1
385 por mm2, mm3
387 movq mm1, mm0
388 movq mm3, mm2
390 punpcklbw mm0, mm6
391 punpckhbw mm1, mm6
393 punpcklbw mm2, mm6
394 punpckhbw mm3, mm6
397 paddw mm0, mm2
398 paddw mm1, mm3
400 paddw mm0, mm1
401 lea rsi, [rsi+rax]
403 add rdi, rdx
404 paddw mm7, mm0
406 cmp rsi, rcx
407 jne x16x8sad_mmx_loop
409 movq mm0, mm7
410 punpcklwd mm0, mm6
412 punpckhwd mm7, mm6
413 paddw mm0, mm7
415 movq mm7, mm0
416 psrlq mm0, 32
418 paddw mm7, mm0
419 movq rax, mm7
421 pop rdi
422 pop rsi
423 mov rsp, rbp
424 ; begin epilog
425 UNSHADOW_ARGS
426 pop rbp