2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
14 ;unsigned int vp8_sad16x16_wmt(
15 ; unsigned char *src_ptr,
17 ; unsigned char *ref_ptr,
19 global sym
(vp8_sad16x16_wmt
)
20 sym
(vp8_sad16x16_wmt
):
23 SHADOW_ARGS_TO_STACK
4
28 mov rsi
, arg
(0) ;src_ptr
29 mov rdi
, arg
(2) ;ref_ptr
31 movsxd rax
, dword ptr arg
(1) ;src_stride
32 movsxd rdx
, dword ptr arg
(3) ;ref_stride
41 movq xmm0
, QWORD PTR [rsi
]
42 movq xmm2
, QWORD PTR [rsi
+8]
44 movq xmm1
, QWORD PTR [rdi
]
45 movq xmm3
, QWORD PTR [rdi
+8]
47 movq xmm4
, QWORD PTR [rsi
+rax
]
48 movq xmm5
, QWORD PTR [rdi
+rdx
]
55 movq xmm6
, QWORD PTR [rsi
+rax
+8]
57 movq xmm3
, QWORD PTR [rdi
+rdx
+8]
70 jne x16x16sad_wmt_loop
85 ;unsigned int vp8_sad8x16_wmt(
86 ; unsigned char *src_ptr,
88 ; unsigned char *ref_ptr,
91 global sym
(vp8_sad8x16_wmt
)
95 SHADOW_ARGS_TO_STACK
5
101 mov rsi
, arg
(0) ;src_ptr
102 mov rdi
, arg
(2) ;ref_ptr
104 movsxd rbx
, dword ptr arg
(1) ;src_stride
105 movsxd rdx
, dword ptr arg
(3) ;ref_stride
116 jg x8x16sad_wmt_early_exit
118 movq mm0
, QWORD PTR [rsi
]
119 movq mm1
, QWORD PTR [rdi
]
121 movq mm2
, QWORD PTR [rsi
+rbx
]
122 movq mm3
, QWORD PTR [rdi
+rdx
]
134 jne x8x16sad_wmt_loop
138 x8x16sad_wmt_early_exit:
149 ;unsigned int vp8_sad8x8_wmt(
150 ; unsigned char *src_ptr,
152 ; unsigned char *ref_ptr,
154 global sym
(vp8_sad8x8_wmt
)
158 SHADOW_ARGS_TO_STACK
5
164 mov rsi
, arg
(0) ;src_ptr
165 mov rdi
, arg
(2) ;ref_ptr
167 movsxd rbx
, dword ptr arg
(1) ;src_stride
168 movsxd rdx
, dword ptr arg
(3) ;ref_stride
177 jg x8x8sad_wmt_early_exit
179 movq mm0
, QWORD PTR [rsi
]
180 movq mm1
, QWORD PTR [rdi
]
192 x8x8sad_wmt_early_exit:
202 ;unsigned int vp8_sad4x4_wmt(
203 ; unsigned char *src_ptr,
205 ; unsigned char *ref_ptr,
207 global sym
(vp8_sad4x4_wmt
)
211 SHADOW_ARGS_TO_STACK
4
216 mov rsi
, arg
(0) ;src_ptr
217 mov rdi
, arg
(2) ;ref_ptr
219 movsxd rax
, dword ptr arg
(1) ;src_stride
220 movsxd rdx
, dword ptr arg
(3) ;ref_stride
222 movd mm0
, DWORD PTR [rsi
]
223 movd mm1
, DWORD PTR [rdi
]
225 movd mm2
, DWORD PTR [rsi
+rax
]
226 movd mm3
, DWORD PTR [rdi
+rdx
]
235 movd mm4
, DWORD PTR [rsi
]
237 movd mm5
, DWORD PTR [rdi
]
238 movd mm6
, DWORD PTR [rsi
+rax
]
240 movd mm7
, DWORD PTR [rdi
+rdx
]
257 ;unsigned int vp8_sad16x8_wmt(
258 ; unsigned char *src_ptr,
260 ; unsigned char *ref_ptr,
262 global sym
(vp8_sad16x8_wmt
)
263 sym
(vp8_sad16x8_wmt
):
266 SHADOW_ARGS_TO_STACK
5
273 mov rsi
, arg
(0) ;src_ptr
274 mov rdi
, arg
(2) ;ref_ptr
276 movsxd rbx
, dword ptr arg
(1) ;src_stride
277 movsxd rdx
, dword ptr arg
(3) ;ref_stride
286 jg x16x8sad_wmt_early_exit
288 movq mm0
, QWORD PTR [rsi
]
289 movq mm2
, QWORD PTR [rsi
+8]
291 movq mm1
, QWORD PTR [rdi
]
292 movq mm3
, QWORD PTR [rdi
+8]
294 movq mm4
, QWORD PTR [rsi
+rbx
]
295 movq mm5
, QWORD PTR [rdi
+rdx
]
300 movq mm1
, QWORD PTR [rsi
+rbx
+8]
301 movq mm3
, QWORD PTR [rdi
+rdx
+8]
316 jne x16x8sad_wmt_loop
320 x16x8sad_wmt_early_exit: