Removed unused vp8_recon_intra4x4mb function
[libvpx.git] / vp8 / encoder / x86 / subtract_sse2.asm
blob3fb23d0975a99b0585d52018e2e4f928cda99891
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
14 ;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
15 ; short *diff, unsigned char *Predictor,
16 ; int pitch);
17 global sym(vp8_subtract_b_sse2_impl)
18 sym(vp8_subtract_b_sse2_impl):
19 push rbp
20 mov rbp, rsp
21 SHADOW_ARGS_TO_STACK 5
22 GET_GOT rbx
23 push rsi
24 push rdi
25 ; end prolog
27 mov rdi, arg(2) ;diff
28 mov rax, arg(3) ;Predictor
29 mov rsi, arg(0) ;z
30 movsxd rdx, dword ptr arg(1);src_stride;
31 movsxd rcx, dword ptr arg(4);pitch
32 pxor mm7, mm7
34 movd mm0, [rsi]
35 movd mm1, [rax]
36 punpcklbw mm0, mm7
37 punpcklbw mm1, mm7
38 psubw mm0, mm1
39 movq MMWORD PTR [rdi], mm0
41 movd mm0, [rsi+rdx]
42 movd mm1, [rax+rcx]
43 punpcklbw mm0, mm7
44 punpcklbw mm1, mm7
45 psubw mm0, mm1
46 movq MMWORD PTR [rdi+rcx*2], mm0
48 movd mm0, [rsi+rdx*2]
49 movd mm1, [rax+rcx*2]
50 punpcklbw mm0, mm7
51 punpcklbw mm1, mm7
52 psubw mm0, mm1
53 movq MMWORD PTR [rdi+rcx*4], mm0
55 lea rsi, [rsi+rdx*2]
56 lea rcx, [rcx+rcx*2]
58 movd mm0, [rsi+rdx]
59 movd mm1, [rax+rcx]
60 punpcklbw mm0, mm7
61 punpcklbw mm1, mm7
62 psubw mm0, mm1
63 movq MMWORD PTR [rdi+rcx*2], mm0
65 ; begin epilog
66 pop rdi
67 pop rsi
68 RESTORE_GOT
69 UNSHADOW_ARGS
70 pop rbp
71 ret
74 ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
75 global sym(vp8_subtract_mby_sse2)
76 sym(vp8_subtract_mby_sse2):
77 push rbp
78 mov rbp, rsp
79 SHADOW_ARGS_TO_STACK 4
80 SAVE_XMM
81 GET_GOT rbx
82 push rsi
83 push rdi
84 ; end prolog
86 mov rsi, arg(1) ;src
87 mov rdi, arg(0) ;diff
89 mov rax, arg(2) ;pred
90 movsxd rdx, dword ptr arg(3) ;stride
92 mov rcx, 8 ; do two lines at one time
94 submby_loop:
95 movdqa xmm0, XMMWORD PTR [rsi] ; src
96 movdqa xmm1, XMMWORD PTR [rax] ; pred
98 movdqa xmm2, xmm0
99 psubb xmm0, xmm1
101 pxor xmm1, [GLOBAL(t80)] ;convert to signed values
102 pxor xmm2, [GLOBAL(t80)]
103 pcmpgtb xmm1, xmm2 ; obtain sign information
105 movdqa xmm2, xmm0
106 movdqa xmm3, xmm1
107 punpcklbw xmm0, xmm1 ; put sign back to subtraction
108 punpckhbw xmm2, xmm3 ; put sign back to subtraction
110 movdqa XMMWORD PTR [rdi], xmm0
111 movdqa XMMWORD PTR [rdi +16], xmm2
113 movdqa xmm4, XMMWORD PTR [rsi + rdx]
114 movdqa xmm5, XMMWORD PTR [rax + 16]
116 movdqa xmm6, xmm4
117 psubb xmm4, xmm5
119 pxor xmm5, [GLOBAL(t80)] ;convert to signed values
120 pxor xmm6, [GLOBAL(t80)]
121 pcmpgtb xmm5, xmm6 ; obtain sign information
123 movdqa xmm6, xmm4
124 movdqa xmm7, xmm5
125 punpcklbw xmm4, xmm5 ; put sign back to subtraction
126 punpckhbw xmm6, xmm7 ; put sign back to subtraction
128 movdqa XMMWORD PTR [rdi +32], xmm4
129 movdqa XMMWORD PTR [rdi +48], xmm6
131 add rdi, 64
132 add rax, 32
133 lea rsi, [rsi+rdx*2]
135 sub rcx, 1
136 jnz submby_loop
138 pop rdi
139 pop rsi
140 ; begin epilog
141 RESTORE_GOT
142 RESTORE_XMM
143 UNSHADOW_ARGS
144 pop rbp
148 ;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
149 global sym(vp8_subtract_mbuv_sse2)
150 sym(vp8_subtract_mbuv_sse2):
151 push rbp
152 mov rbp, rsp
153 SHADOW_ARGS_TO_STACK 5
154 GET_GOT rbx
155 push rsi
156 push rdi
157 ; end prolog
159 mov rdi, arg(0) ;diff
160 mov rax, arg(3) ;pred
161 mov rsi, arg(1) ;z = usrc
162 add rdi, 256*2 ;diff = diff + 256 (shorts)
163 add rax, 256 ;Predictor = pred + 256
164 movsxd rdx, dword ptr arg(4) ;stride;
165 lea rcx, [rdx + rdx*2]
168 ;line 0 1
169 movq xmm0, MMWORD PTR [rsi] ; src
170 movq xmm2, MMWORD PTR [rsi+rdx]
171 movdqa xmm1, XMMWORD PTR [rax] ; pred
172 punpcklqdq xmm0, xmm2
174 movdqa xmm2, xmm0
175 psubb xmm0, xmm1 ; subtraction with sign missed
177 pxor xmm1, [GLOBAL(t80)] ;convert to signed values
178 pxor xmm2, [GLOBAL(t80)]
179 pcmpgtb xmm1, xmm2 ; obtain sign information
181 movdqa xmm2, xmm0
182 movdqa xmm3, xmm1
183 punpcklbw xmm0, xmm1 ; put sign back to subtraction
184 punpckhbw xmm2, xmm3 ; put sign back to subtraction
186 movdqa XMMWORD PTR [rdi], xmm0
187 movdqa XMMWORD PTR [rdi +16], xmm2
189 ;line 2 3
190 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
191 movq xmm2, MMWORD PTR [rsi+rcx]
192 movdqa xmm1, XMMWORD PTR [rax+16] ; pred
193 punpcklqdq xmm0, xmm2
195 movdqa xmm2, xmm0
196 psubb xmm0, xmm1 ; subtraction with sign missed
198 pxor xmm1, [GLOBAL(t80)] ;convert to signed values
199 pxor xmm2, [GLOBAL(t80)]
200 pcmpgtb xmm1, xmm2 ; obtain sign information
202 movdqa xmm2, xmm0
203 movdqa xmm3, xmm1
204 punpcklbw xmm0, xmm1 ; put sign back to subtraction
205 punpckhbw xmm2, xmm3 ; put sign back to subtraction
207 movdqa XMMWORD PTR [rdi + 32], xmm0
208 movdqa XMMWORD PTR [rdi + 48], xmm2
210 ;line 4 5
211 lea rsi, [rsi + rdx*4]
213 movq xmm0, MMWORD PTR [rsi] ; src
214 movq xmm2, MMWORD PTR [rsi+rdx]
215 movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
216 punpcklqdq xmm0, xmm2
218 movdqa xmm2, xmm0
219 psubb xmm0, xmm1 ; subtraction with sign missed
221 pxor xmm1, [GLOBAL(t80)] ;convert to signed values
222 pxor xmm2, [GLOBAL(t80)]
223 pcmpgtb xmm1, xmm2 ; obtain sign information
225 movdqa xmm2, xmm0
226 movdqa xmm3, xmm1
227 punpcklbw xmm0, xmm1 ; put sign back to subtraction
228 punpckhbw xmm2, xmm3 ; put sign back to subtraction
230 movdqa XMMWORD PTR [rdi + 64], xmm0
231 movdqa XMMWORD PTR [rdi + 80], xmm2
233 ;line 6 7
234 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
235 movq xmm2, MMWORD PTR [rsi+rcx]
236 movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
237 punpcklqdq xmm0, xmm2
239 movdqa xmm2, xmm0
240 psubb xmm0, xmm1 ; subtraction with sign missed
242 pxor xmm1, [GLOBAL(t80)] ;convert to signed values
243 pxor xmm2, [GLOBAL(t80)]
244 pcmpgtb xmm1, xmm2 ; obtain sign information
246 movdqa xmm2, xmm0
247 movdqa xmm3, xmm1
248 punpcklbw xmm0, xmm1 ; put sign back to subtraction
249 punpckhbw xmm2, xmm3 ; put sign back to subtraction
251 movdqa XMMWORD PTR [rdi + 96], xmm0
252 movdqa XMMWORD PTR [rdi + 112], xmm2
255 mov rsi, arg(2) ;z = vsrc
256 add rdi, 64*2 ;diff = diff + 320 (shorts)
257 add rax, 64 ;Predictor = pred + 320
259 ;line 0 1
260 movq xmm0, MMWORD PTR [rsi] ; src
261 movq xmm2, MMWORD PTR [rsi+rdx]
262 movdqa xmm1, XMMWORD PTR [rax] ; pred
263 punpcklqdq xmm0, xmm2
265 movdqa xmm2, xmm0
266 psubb xmm0, xmm1 ; subtraction with sign missed
268 pxor xmm1, [GLOBAL(t80)] ;convert to signed values
269 pxor xmm2, [GLOBAL(t80)]
270 pcmpgtb xmm1, xmm2 ; obtain sign information
272 movdqa xmm2, xmm0
273 movdqa xmm3, xmm1
274 punpcklbw xmm0, xmm1 ; put sign back to subtraction
275 punpckhbw xmm2, xmm3 ; put sign back to subtraction
277 movdqa XMMWORD PTR [rdi], xmm0
278 movdqa XMMWORD PTR [rdi +16], xmm2
280 ;line 2 3
281 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
282 movq xmm2, MMWORD PTR [rsi+rcx]
283 movdqa xmm1, XMMWORD PTR [rax+16] ; pred
284 punpcklqdq xmm0, xmm2
286 movdqa xmm2, xmm0
287 psubb xmm0, xmm1 ; subtraction with sign missed
289 pxor xmm1, [GLOBAL(t80)] ;convert to signed values
290 pxor xmm2, [GLOBAL(t80)]
291 pcmpgtb xmm1, xmm2 ; obtain sign information
293 movdqa xmm2, xmm0
294 movdqa xmm3, xmm1
295 punpcklbw xmm0, xmm1 ; put sign back to subtraction
296 punpckhbw xmm2, xmm3 ; put sign back to subtraction
298 movdqa XMMWORD PTR [rdi + 32], xmm0
299 movdqa XMMWORD PTR [rdi + 48], xmm2
301 ;line 4 5
302 lea rsi, [rsi + rdx*4]
304 movq xmm0, MMWORD PTR [rsi] ; src
305 movq xmm2, MMWORD PTR [rsi+rdx]
306 movdqa xmm1, XMMWORD PTR [rax + 32] ; pred
307 punpcklqdq xmm0, xmm2
309 movdqa xmm2, xmm0
310 psubb xmm0, xmm1 ; subtraction with sign missed
312 pxor xmm1, [GLOBAL(t80)] ;convert to signed values
313 pxor xmm2, [GLOBAL(t80)]
314 pcmpgtb xmm1, xmm2 ; obtain sign information
316 movdqa xmm2, xmm0
317 movdqa xmm3, xmm1
318 punpcklbw xmm0, xmm1 ; put sign back to subtraction
319 punpckhbw xmm2, xmm3 ; put sign back to subtraction
321 movdqa XMMWORD PTR [rdi + 64], xmm0
322 movdqa XMMWORD PTR [rdi + 80], xmm2
324 ;line 6 7
325 movq xmm0, MMWORD PTR [rsi+rdx*2] ; src
326 movq xmm2, MMWORD PTR [rsi+rcx]
327 movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred
328 punpcklqdq xmm0, xmm2
330 movdqa xmm2, xmm0
331 psubb xmm0, xmm1 ; subtraction with sign missed
333 pxor xmm1, [GLOBAL(t80)] ;convert to signed values
334 pxor xmm2, [GLOBAL(t80)]
335 pcmpgtb xmm1, xmm2 ; obtain sign information
337 movdqa xmm2, xmm0
338 movdqa xmm3, xmm1
339 punpcklbw xmm0, xmm1 ; put sign back to subtraction
340 punpckhbw xmm2, xmm3 ; put sign back to subtraction
342 movdqa XMMWORD PTR [rdi + 96], xmm0
343 movdqa XMMWORD PTR [rdi + 112], xmm2
345 ; begin epilog
346 pop rdi
347 pop rsi
348 RESTORE_GOT
349 UNSHADOW_ARGS
350 pop rbp
353 SECTION_RODATA
354 align 16
355 t80:
356 times 16 db 0x80