2 ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree.
11 %include "vpx_ports/x86_abi_support.asm"
15 ;unsigned int vp8_sad16x16_wmt(
16 ; unsigned char *src_ptr,
18 ; unsigned char *ref_ptr,
20 global sym
(vp8_sad16x16_wmt
)
21 sym
(vp8_sad16x16_wmt
):
24 SHADOW_ARGS_TO_STACK
4
29 mov rsi
, arg
(0) ;src_ptr
30 mov rdi
, arg
(2) ;ref_ptr
32 movsxd rax
, dword ptr arg
(1) ;src_stride
33 movsxd rdx
, dword ptr arg
(3) ;ref_stride
42 movq xmm0
, QWORD PTR [rsi
]
43 movq xmm2
, QWORD PTR [rsi
+8]
45 movq xmm1
, QWORD PTR [rdi
]
46 movq xmm3
, QWORD PTR [rdi
+8]
48 movq xmm4
, QWORD PTR [rsi
+rax
]
49 movq xmm5
, QWORD PTR [rdi
+rdx
]
56 movq xmm6
, QWORD PTR [rsi
+rax
+8]
58 movq xmm3
, QWORD PTR [rdi
+rdx
+8]
71 jne x16x16sad_wmt_loop
86 ;unsigned int vp8_sad8x16_wmt(
87 ; unsigned char *src_ptr,
89 ; unsigned char *ref_ptr,
92 global sym
(vp8_sad8x16_wmt
)
96 SHADOW_ARGS_TO_STACK
5
102 mov rsi
, arg
(0) ;src_ptr
103 mov rdi
, arg
(2) ;ref_ptr
105 movsxd rbx
, dword ptr arg
(1) ;src_stride
106 movsxd rdx
, dword ptr arg
(3) ;ref_stride
117 jg x8x16sad_wmt_early_exit
119 movq mm0
, QWORD PTR [rsi
]
120 movq mm1
, QWORD PTR [rdi
]
122 movq mm2
, QWORD PTR [rsi
+rbx
]
123 movq mm3
, QWORD PTR [rdi
+rdx
]
135 jne x8x16sad_wmt_loop
139 x8x16sad_wmt_early_exit:
150 ;unsigned int vp8_sad8x8_wmt(
151 ; unsigned char *src_ptr,
153 ; unsigned char *ref_ptr,
155 global sym
(vp8_sad8x8_wmt
)
159 SHADOW_ARGS_TO_STACK
5
165 mov rsi
, arg
(0) ;src_ptr
166 mov rdi
, arg
(2) ;ref_ptr
168 movsxd rbx
, dword ptr arg
(1) ;src_stride
169 movsxd rdx
, dword ptr arg
(3) ;ref_stride
178 jg x8x8sad_wmt_early_exit
180 movq mm0
, QWORD PTR [rsi
]
181 movq mm1
, QWORD PTR [rdi
]
193 x8x8sad_wmt_early_exit:
203 ;unsigned int vp8_sad4x4_wmt(
204 ; unsigned char *src_ptr,
206 ; unsigned char *ref_ptr,
208 global sym
(vp8_sad4x4_wmt
)
212 SHADOW_ARGS_TO_STACK
4
217 mov rsi
, arg
(0) ;src_ptr
218 mov rdi
, arg
(2) ;ref_ptr
220 movsxd rax
, dword ptr arg
(1) ;src_stride
221 movsxd rdx
, dword ptr arg
(3) ;ref_stride
223 movd mm0
, QWORD PTR [rsi
]
224 movd mm1
, QWORD PTR [rdi
]
226 movd mm2
, QWORD PTR [rsi
+rax
]
227 movd mm3
, QWORD PTR [rdi
+rdx
]
236 movd mm4
, QWORD PTR [rsi
]
238 movd mm5
, QWORD PTR [rdi
]
239 movd mm6
, QWORD PTR [rsi
+rax
]
241 movd mm7
, QWORD PTR [rdi
+rdx
]
258 ;unsigned int vp8_sad16x8_wmt(
259 ; unsigned char *src_ptr,
261 ; unsigned char *ref_ptr,
263 global sym
(vp8_sad16x8_wmt
)
264 sym
(vp8_sad16x8_wmt
):
267 SHADOW_ARGS_TO_STACK
5
274 mov rsi
, arg
(0) ;src_ptr
275 mov rdi
, arg
(2) ;ref_ptr
277 movsxd rbx
, dword ptr arg
(1) ;src_stride
278 movsxd rdx
, dword ptr arg
(3) ;ref_stride
287 jg x16x8sad_wmt_early_exit
289 movq mm0
, QWORD PTR [rsi
]
290 movq mm2
, QWORD PTR [rsi
+8]
292 movq mm1
, QWORD PTR [rdi
]
293 movq mm3
, QWORD PTR [rdi
+8]
295 movq mm4
, QWORD PTR [rsi
+rbx
]
296 movq mm5
, QWORD PTR [rdi
+rdx
]
301 movq mm1
, QWORD PTR [rsi
+rbx
+8]
302 movq mm3
, QWORD PTR [rdi
+rdx
+8]
317 jne x16x8sad_wmt_loop
321 x16x8sad_wmt_early_exit: