2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
14 ;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
15 ; short *diff, unsigned char *Predictor,
17 global sym
(vp8_subtract_b_sse2_impl
)
18 sym
(vp8_subtract_b_sse2_impl
):
21 SHADOW_ARGS_TO_STACK
5
28 mov rax
, arg
(3) ;Predictor
30 movsxd rdx
, dword ptr arg
(1);src_stride;
31 movsxd rcx
, dword ptr arg
(4);pitch
39 movq MMWORD
PTR [rdi
], mm0
46 movq MMWORD
PTR [rdi
+rcx
*2], mm0
53 movq MMWORD
PTR [rdi
+rcx
*4], mm0
63 movq MMWORD
PTR [rdi
+rcx
*2], mm0
74 ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
75 global sym
(vp8_subtract_mby_sse2
)
76 sym
(vp8_subtract_mby_sse2
):
79 SHADOW_ARGS_TO_STACK
4
90 movsxd rdx
, dword ptr arg
(3) ;stride
92 mov rcx
, 8 ; do two lines at one time
95 movdqa xmm0
, XMMWORD
PTR [rsi
] ; src
96 movdqa xmm1
, XMMWORD
PTR [rax
] ; pred
101 pxor xmm1
, [GLOBAL(t80
)] ;convert to signed values
102 pxor xmm2
, [GLOBAL(t80
)]
103 pcmpgtb xmm1
, xmm2
; obtain sign information
107 punpcklbw xmm0
, xmm1
; put sign back to subtraction
108 punpckhbw xmm2
, xmm3
; put sign back to subtraction
110 movdqa XMMWORD
PTR [rdi
], xmm0
111 movdqa XMMWORD
PTR [rdi
+16], xmm2
113 movdqa xmm4
, XMMWORD
PTR [rsi
+ rdx
]
114 movdqa xmm5
, XMMWORD
PTR [rax
+ 16]
119 pxor xmm5
, [GLOBAL(t80
)] ;convert to signed values
120 pxor xmm6
, [GLOBAL(t80
)]
121 pcmpgtb xmm5
, xmm6
; obtain sign information
125 punpcklbw xmm4
, xmm5
; put sign back to subtraction
126 punpckhbw xmm6
, xmm7
; put sign back to subtraction
128 movdqa XMMWORD
PTR [rdi
+32], xmm4
129 movdqa XMMWORD
PTR [rdi
+48], xmm6
148 ;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
149 global sym
(vp8_subtract_mbuv_sse2
)
150 sym
(vp8_subtract_mbuv_sse2
):
153 SHADOW_ARGS_TO_STACK
5
159 mov rdi
, arg
(0) ;diff
160 mov rax
, arg
(3) ;pred
161 mov rsi
, arg
(1) ;z = usrc
162 add rdi
, 256*2 ;diff = diff + 256 (shorts)
163 add rax
, 256 ;Predictor = pred + 256
164 movsxd rdx
, dword ptr arg
(4) ;stride;
165 lea rcx
, [rdx
+ rdx
*2]
169 movq xmm0
, MMWORD
PTR [rsi
] ; src
170 movq xmm2
, MMWORD
PTR [rsi
+rdx
]
171 movdqa xmm1
, XMMWORD
PTR [rax
] ; pred
172 punpcklqdq xmm0
, xmm2
175 psubb xmm0
, xmm1
; subtraction with sign missed
177 pxor xmm1
, [GLOBAL(t80
)] ;convert to signed values
178 pxor xmm2
, [GLOBAL(t80
)]
179 pcmpgtb xmm1
, xmm2
; obtain sign information
183 punpcklbw xmm0
, xmm1
; put sign back to subtraction
184 punpckhbw xmm2
, xmm3
; put sign back to subtraction
186 movdqa XMMWORD
PTR [rdi
], xmm0
187 movdqa XMMWORD
PTR [rdi
+16], xmm2
190 movq xmm0
, MMWORD
PTR [rsi
+rdx
*2] ; src
191 movq xmm2
, MMWORD
PTR [rsi
+rcx
]
192 movdqa xmm1
, XMMWORD
PTR [rax
+16] ; pred
193 punpcklqdq xmm0
, xmm2
196 psubb xmm0
, xmm1
; subtraction with sign missed
198 pxor xmm1
, [GLOBAL(t80
)] ;convert to signed values
199 pxor xmm2
, [GLOBAL(t80
)]
200 pcmpgtb xmm1
, xmm2
; obtain sign information
204 punpcklbw xmm0
, xmm1
; put sign back to subtraction
205 punpckhbw xmm2
, xmm3
; put sign back to subtraction
207 movdqa XMMWORD
PTR [rdi
+ 32], xmm0
208 movdqa XMMWORD
PTR [rdi
+ 48], xmm2
211 lea rsi
, [rsi
+ rdx
*4]
213 movq xmm0
, MMWORD
PTR [rsi
] ; src
214 movq xmm2
, MMWORD
PTR [rsi
+rdx
]
215 movdqa xmm1
, XMMWORD
PTR [rax
+ 32] ; pred
216 punpcklqdq xmm0
, xmm2
219 psubb xmm0
, xmm1
; subtraction with sign missed
221 pxor xmm1
, [GLOBAL(t80
)] ;convert to signed values
222 pxor xmm2
, [GLOBAL(t80
)]
223 pcmpgtb xmm1
, xmm2
; obtain sign information
227 punpcklbw xmm0
, xmm1
; put sign back to subtraction
228 punpckhbw xmm2
, xmm3
; put sign back to subtraction
230 movdqa XMMWORD
PTR [rdi
+ 64], xmm0
231 movdqa XMMWORD
PTR [rdi
+ 80], xmm2
234 movq xmm0
, MMWORD
PTR [rsi
+rdx
*2] ; src
235 movq xmm2
, MMWORD
PTR [rsi
+rcx
]
236 movdqa xmm1
, XMMWORD
PTR [rax
+ 48] ; pred
237 punpcklqdq xmm0
, xmm2
240 psubb xmm0
, xmm1
; subtraction with sign missed
242 pxor xmm1
, [GLOBAL(t80
)] ;convert to signed values
243 pxor xmm2
, [GLOBAL(t80
)]
244 pcmpgtb xmm1
, xmm2
; obtain sign information
248 punpcklbw xmm0
, xmm1
; put sign back to subtraction
249 punpckhbw xmm2
, xmm3
; put sign back to subtraction
251 movdqa XMMWORD
PTR [rdi
+ 96], xmm0
252 movdqa XMMWORD
PTR [rdi
+ 112], xmm2
255 mov rsi
, arg
(2) ;z = vsrc
256 add rdi
, 64*2 ;diff = diff + 320 (shorts)
257 add rax
, 64 ;Predictor = pred + 320
260 movq xmm0
, MMWORD
PTR [rsi
] ; src
261 movq xmm2
, MMWORD
PTR [rsi
+rdx
]
262 movdqa xmm1
, XMMWORD
PTR [rax
] ; pred
263 punpcklqdq xmm0
, xmm2
266 psubb xmm0
, xmm1
; subtraction with sign missed
268 pxor xmm1
, [GLOBAL(t80
)] ;convert to signed values
269 pxor xmm2
, [GLOBAL(t80
)]
270 pcmpgtb xmm1
, xmm2
; obtain sign information
274 punpcklbw xmm0
, xmm1
; put sign back to subtraction
275 punpckhbw xmm2
, xmm3
; put sign back to subtraction
277 movdqa XMMWORD
PTR [rdi
], xmm0
278 movdqa XMMWORD
PTR [rdi
+16], xmm2
281 movq xmm0
, MMWORD
PTR [rsi
+rdx
*2] ; src
282 movq xmm2
, MMWORD
PTR [rsi
+rcx
]
283 movdqa xmm1
, XMMWORD
PTR [rax
+16] ; pred
284 punpcklqdq xmm0
, xmm2
287 psubb xmm0
, xmm1
; subtraction with sign missed
289 pxor xmm1
, [GLOBAL(t80
)] ;convert to signed values
290 pxor xmm2
, [GLOBAL(t80
)]
291 pcmpgtb xmm1
, xmm2
; obtain sign information
295 punpcklbw xmm0
, xmm1
; put sign back to subtraction
296 punpckhbw xmm2
, xmm3
; put sign back to subtraction
298 movdqa XMMWORD
PTR [rdi
+ 32], xmm0
299 movdqa XMMWORD
PTR [rdi
+ 48], xmm2
302 lea rsi
, [rsi
+ rdx
*4]
304 movq xmm0
, MMWORD
PTR [rsi
] ; src
305 movq xmm2
, MMWORD
PTR [rsi
+rdx
]
306 movdqa xmm1
, XMMWORD
PTR [rax
+ 32] ; pred
307 punpcklqdq xmm0
, xmm2
310 psubb xmm0
, xmm1
; subtraction with sign missed
312 pxor xmm1
, [GLOBAL(t80
)] ;convert to signed values
313 pxor xmm2
, [GLOBAL(t80
)]
314 pcmpgtb xmm1
, xmm2
; obtain sign information
318 punpcklbw xmm0
, xmm1
; put sign back to subtraction
319 punpckhbw xmm2
, xmm3
; put sign back to subtraction
321 movdqa XMMWORD
PTR [rdi
+ 64], xmm0
322 movdqa XMMWORD
PTR [rdi
+ 80], xmm2
325 movq xmm0
, MMWORD
PTR [rsi
+rdx
*2] ; src
326 movq xmm2
, MMWORD
PTR [rsi
+rcx
]
327 movdqa xmm1
, XMMWORD
PTR [rax
+ 48] ; pred
328 punpcklqdq xmm0
, xmm2
331 psubb xmm0
, xmm1
; subtraction with sign missed
333 pxor xmm1
, [GLOBAL(t80
)] ;convert to signed values
334 pxor xmm2
, [GLOBAL(t80
)]
335 pcmpgtb xmm1
, xmm2
; obtain sign information
339 punpcklbw xmm0
, xmm1
; put sign back to subtraction
340 punpckhbw xmm2
, xmm3
; put sign back to subtraction
342 movdqa XMMWORD
PTR [rdi
+ 96], xmm0
343 movdqa XMMWORD
PTR [rdi
+ 112], xmm2