2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
14 %macro PROCESS_16X2X8
1
16 movdqa xmm0
, XMMWORD
PTR [rsi
]
17 movq xmm1
, MMWORD
PTR [rdi
]
18 movq xmm3
, MMWORD
PTR [rdi
+8]
19 movq xmm2
, MMWORD
PTR [rdi
+16]
24 mpsadbw xmm1
, xmm0
, 0x0
25 mpsadbw xmm2
, xmm0
, 0x5
30 mpsadbw xmm3
, xmm0
, 0x0
31 mpsadbw xmm4
, xmm0
, 0x5
37 movdqa xmm0
, XMMWORD
PTR [rsi
]
38 movq xmm5
, MMWORD
PTR [rdi
]
39 movq xmm3
, MMWORD
PTR [rdi
+8]
40 movq xmm2
, MMWORD
PTR [rdi
+16]
45 mpsadbw xmm5
, xmm0
, 0x0
46 mpsadbw xmm2
, xmm0
, 0x5
51 mpsadbw xmm3
, xmm0
, 0x0
52 mpsadbw xmm4
, xmm0
, 0x5
60 movdqa xmm0
, XMMWORD
PTR [rsi
+ rax
]
61 movq xmm5
, MMWORD
PTR [rdi
+ rdx
]
62 movq xmm3
, MMWORD
PTR [rdi
+ rdx
+8]
63 movq xmm2
, MMWORD
PTR [rdi
+ rdx
+16]
71 mpsadbw xmm5
, xmm0
, 0x0
72 mpsadbw xmm2
, xmm0
, 0x5
76 mpsadbw xmm3
, xmm0
, 0x0
77 mpsadbw xmm4
, xmm0
, 0x5
86 %macro PROCESS_8X2X8
1
88 movq xmm0
, MMWORD
PTR [rsi
]
89 movq xmm1
, MMWORD
PTR [rdi
]
90 movq xmm3
, MMWORD
PTR [rdi
+8]
94 mpsadbw xmm1
, xmm0
, 0x0
95 mpsadbw xmm2
, xmm0
, 0x5
98 movq xmm0
, MMWORD
PTR [rsi
]
99 movq xmm5
, MMWORD
PTR [rdi
]
100 movq xmm3
, MMWORD
PTR [rdi
+8]
101 punpcklqdq xmm5
, xmm3
104 mpsadbw xmm5
, xmm0
, 0x0
105 mpsadbw xmm2
, xmm0
, 0x5
110 movq xmm0
, MMWORD
PTR [rsi
+ rax
]
111 movq xmm5
, MMWORD
PTR [rdi
+ rdx
]
112 movq xmm3
, MMWORD
PTR [rdi
+ rdx
+8]
113 punpcklqdq xmm5
, xmm3
119 mpsadbw xmm5
, xmm0
, 0x0
120 mpsadbw xmm2
, xmm0
, 0x5
126 %macro PROCESS_4X2X8
1
129 movq xmm1
, MMWORD
PTR [rdi
]
130 movq xmm3
, MMWORD
PTR [rdi
+8]
131 punpcklqdq xmm1
, xmm3
133 mpsadbw xmm1
, xmm0
, 0x0
136 movq xmm5
, MMWORD
PTR [rdi
]
137 movq xmm3
, MMWORD
PTR [rdi
+8]
138 punpcklqdq xmm5
, xmm3
140 mpsadbw xmm5
, xmm0
, 0x0
144 movd xmm0
, [rsi
+ rax
]
145 movq xmm5
, MMWORD
PTR [rdi
+ rdx
]
146 movq xmm3
, MMWORD
PTR [rdi
+ rdx
+8]
147 punpcklqdq xmm5
, xmm3
152 mpsadbw xmm5
, xmm0
, 0x0
158 ;void vp8_sad16x16x8_sse4(
159 ; const unsigned char *src_ptr,
161 ; const unsigned char *ref_ptr,
163 ; unsigned short *sad_array);
164 global sym
(vp8_sad16x16x8_sse4
)
165 sym
(vp8_sad16x16x8_sse4
):
168 SHADOW_ARGS_TO_STACK
5
173 mov rsi
, arg
(0) ;src_ptr
174 mov rdi
, arg
(2) ;ref_ptr
176 movsxd rax
, dword ptr arg
(1) ;src_stride
177 movsxd rdx
, dword ptr arg
(3) ;ref_stride
188 mov rdi
, arg
(4) ;Results
189 movdqu XMMWORD
PTR [rdi
], xmm1
199 ;void vp8_sad16x8x8_sse4(
200 ; const unsigned char *src_ptr,
202 ; const unsigned char *ref_ptr,
204 ; unsigned short *sad_array
206 global sym
(vp8_sad16x8x8_sse4
)
207 sym
(vp8_sad16x8x8_sse4
):
210 SHADOW_ARGS_TO_STACK
5
215 mov rsi
, arg
(0) ;src_ptr
216 mov rdi
, arg
(2) ;ref_ptr
218 movsxd rax
, dword ptr arg
(1) ;src_stride
219 movsxd rdx
, dword ptr arg
(3) ;ref_stride
226 mov rdi
, arg
(4) ;Results
227 movdqu XMMWORD
PTR [rdi
], xmm1
237 ;void vp8_sad8x8x8_sse4(
238 ; const unsigned char *src_ptr,
240 ; const unsigned char *ref_ptr,
242 ; unsigned short *sad_array
244 global sym
(vp8_sad8x8x8_sse4
)
245 sym
(vp8_sad8x8x8_sse4
):
248 SHADOW_ARGS_TO_STACK
5
253 mov rsi
, arg
(0) ;src_ptr
254 mov rdi
, arg
(2) ;ref_ptr
256 movsxd rax
, dword ptr arg
(1) ;src_stride
257 movsxd rdx
, dword ptr arg
(3) ;ref_stride
264 mov rdi
, arg
(4) ;Results
265 movdqu XMMWORD
PTR [rdi
], xmm1
275 ;void vp8_sad8x16x8_sse4(
276 ; const unsigned char *src_ptr,
278 ; const unsigned char *ref_ptr,
280 ; unsigned short *sad_array
282 global sym
(vp8_sad8x16x8_sse4
)
283 sym
(vp8_sad8x16x8_sse4
):
286 SHADOW_ARGS_TO_STACK
5
291 mov rsi
, arg
(0) ;src_ptr
292 mov rdi
, arg
(2) ;ref_ptr
294 movsxd rax
, dword ptr arg
(1) ;src_stride
295 movsxd rdx
, dword ptr arg
(3) ;ref_stride
305 mov rdi
, arg
(4) ;Results
306 movdqu XMMWORD
PTR [rdi
], xmm1
316 ;void vp8_sad4x4x8_c(
317 ; const unsigned char *src_ptr,
319 ; const unsigned char *ref_ptr,
321 ; unsigned short *sad_array
323 global sym
(vp8_sad4x4x8_sse4
)
324 sym
(vp8_sad4x4x8_sse4
):
327 SHADOW_ARGS_TO_STACK
5
332 mov rsi
, arg
(0) ;src_ptr
333 mov rdi
, arg
(2) ;ref_ptr
335 movsxd rax
, dword ptr arg
(1) ;src_stride
336 movsxd rdx
, dword ptr arg
(3) ;ref_stride
341 mov rdi
, arg
(4) ;Results
342 movdqu XMMWORD
PTR [rdi
], xmm1