2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
13 ;void vp8_recon2b_sse2(unsigned char *s, short *q, unsigned char *d, int stride)
14 global sym
(vp8_recon2b_sse2
)
15 sym
(vp8_recon2b_sse2
):
18 SHADOW_ARGS_TO_STACK
4
26 movsxd rax
, dword ptr arg
(3) ;stride
29 movq xmm1
, MMWORD
PTR [rsi
]
31 paddsw xmm1
, XMMWORD
PTR [rdx
]
32 packuswb xmm1
, xmm0
; pack and unpack to saturate
33 movq MMWORD
PTR [rdi
], xmm1
36 movq xmm2
, MMWORD
PTR [rsi
+8]
38 paddsw xmm2
, XMMWORD
PTR [rdx
+16]
39 packuswb xmm2
, xmm0
; pack and unpack to saturate
40 movq MMWORD
PTR [rdi
+rax
], xmm2
43 movq xmm3
, MMWORD
PTR [rsi
+16]
45 paddsw xmm3
, XMMWORD
PTR [rdx
+32]
46 packuswb xmm3
, xmm0
; pack and unpack to saturate
47 movq MMWORD
PTR [rdi
+rax
*2], xmm3
50 movq xmm4
, MMWORD
PTR [rsi
+24]
52 paddsw xmm4
, XMMWORD
PTR [rdx
+48]
53 packuswb xmm4
, xmm0
; pack and unpack to saturate
54 movq MMWORD
PTR [rdi
+rax
*2], xmm4
64 ;void vp8_recon4b_sse2(unsigned char *s, short *q, unsigned char *d, int stride)
65 global sym
(vp8_recon4b_sse2
)
66 sym
(vp8_recon4b_sse2
):
69 SHADOW_ARGS_TO_STACK
4
78 movsxd rax
, dword ptr arg
(3) ;stride
81 movdqa xmm1
, XMMWORD
PTR [rsi
]
85 paddsw xmm1
, XMMWORD
PTR [rdx
]
86 paddsw xmm5
, XMMWORD
PTR [rdx
+16]
87 packuswb xmm1
, xmm5
; pack and unpack to saturate
88 movdqa XMMWORD
PTR [rdi
], xmm1
91 movdqa xmm2
, XMMWORD
PTR [rsi
+16]
95 paddsw xmm2
, XMMWORD
PTR [rdx
+32]
96 paddsw xmm6
, XMMWORD
PTR [rdx
+48]
97 packuswb xmm2
, xmm6
; pack and unpack to saturate
98 movdqa XMMWORD
PTR [rdi
+rax
], xmm2
101 movdqa xmm3
, XMMWORD
PTR [rsi
+32]
105 paddsw xmm3
, XMMWORD
PTR [rdx
+64]
106 paddsw xmm7
, XMMWORD
PTR [rdx
+80]
107 packuswb xmm3
, xmm7
; pack and unpack to saturate
108 movdqa XMMWORD
PTR [rdi
+rax
*2], xmm3
111 movdqa xmm4
, XMMWORD
PTR [rsi
+48]
115 paddsw xmm4
, XMMWORD
PTR [rdx
+96]
116 paddsw xmm5
, XMMWORD
PTR [rdx
+112]
117 packuswb xmm4
, xmm5
; pack and unpack to saturate
118 movdqa XMMWORD
PTR [rdi
+rax
*2], xmm4
129 ;void copy_mem16x16_sse2(
130 ; unsigned char *src,
132 ; unsigned char *dst,
135 global sym
(vp8_copy_mem16x16_sse2
)
136 sym
(vp8_copy_mem16x16_sse2
):
139 SHADOW_ARGS_TO_STACK
4
144 mov rsi
, arg
(0) ;src;
147 movsxd rax
, dword ptr arg
(1) ;src_stride;
148 mov rdi
, arg
(2) ;dst;
150 movdqu xmm1
, [rsi
+rax
]
151 movdqu xmm2
, [rsi
+rax
*2]
153 movsxd rcx
, dword ptr arg
(3) ;dst_stride
159 movdqa
[rdi
+rcx
], xmm1
160 movdqa
[rdi
+rcx
*2],xmm2
166 movdqu xmm4
, [rsi
+rax
]
168 movdqu xmm5
, [rsi
+rax
*2]
174 movdqa
[rdi
+rcx
], xmm4
175 movdqa
[rdi
+rcx
*2],xmm5
181 movdqu xmm1
, [rsi
+rax
]
183 movdqu xmm2
, [rsi
+rax
*2]
189 movdqa
[rdi
+rcx
], xmm1
191 movdqa
[rdi
+rcx
*2], xmm2
194 movdqu xmm4
, [rsi
+rax
]
198 movdqu xmm5
, [rsi
+rax
*2]
204 movdqa
[rdi
+rcx
], xmm4
206 movdqa
[rdi
+rcx
*2],xmm5
210 movdqu xmm1
, [rsi
+rax
]
213 movdqu xmm2
, [rsi
+rax
*2]
218 movdqa
[rdi
+rcx
], xmm1
219 movdqa
[rdi
+rcx
*2],xmm2
221 movdqu xmm3
, [rsi
+rax
]
224 movdqa
[rdi
+rcx
], xmm3