2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
13 ;void vp8_recon_b_mmx(unsigned char *s, short *q, unsigned char *d, int stride)
14 global sym
(vp8_recon_b_mmx
)
18 SHADOW_ARGS_TO_STACK
4
26 movsxd rax
, dword ptr arg
(3) ;stride
32 packuswb mm1
, mm0
; pack and unpack to saturate
38 packuswb mm2
, mm0
; pack and unpack to saturate
44 packuswb mm3
, mm0
; pack and unpack to saturate
51 packuswb mm4
, mm0
; pack and unpack to saturate
62 ;void copy_mem8x8_mmx(
68 global sym
(vp8_copy_mem8x8_mmx
)
69 sym
(vp8_copy_mem8x8_mmx
):
72 SHADOW_ARGS_TO_STACK
4
80 movsxd rax
, dword ptr arg
(1) ;src_stride;
86 movsxd rcx
, dword ptr arg
(3) ;dst_stride
102 movq mm5
, [rsi
+rax
*2]
108 movq
[rdi
+rcx
*2], mm5
112 movq mm1
, [rsi
+rax
*2]
125 ;void copy_mem8x4_mmx(
126 ; unsigned char *src,
128 ; unsigned char *dst,
131 global sym
(vp8_copy_mem8x4_mmx
)
132 sym
(vp8_copy_mem8x4_mmx
):
135 SHADOW_ARGS_TO_STACK
4
140 mov rsi
, arg
(0) ;src;
143 movsxd rax
, dword ptr arg
(1) ;src_stride;
144 mov rdi
, arg
(2) ;dst;
147 movq mm2
, [rsi
+rax
*2]
149 movsxd rcx
, dword ptr arg
(3) ;dst_stride
155 movq
[rdi
+rcx
*2], mm2
169 ;void copy_mem16x16_mmx(
170 ; unsigned char *src,
172 ; unsigned char *dst,
175 global sym
(vp8_copy_mem16x16_mmx
)
176 sym
(vp8_copy_mem16x16_mmx
):
179 SHADOW_ARGS_TO_STACK
4
184 mov rsi
, arg
(0) ;src;
185 movsxd rax
, dword ptr arg
(1) ;src_stride;
187 mov rdi
, arg
(2) ;dst;
188 movsxd rcx
, dword ptr arg
(3) ;dst_stride
194 movq mm4
, [rsi
+rax
+8]
196 movq mm2
, [rsi
+rax
*2]
197 movq mm5
, [rsi
+rax
*2+8]
206 movq
[rdi
+rcx
+8], mm4
208 movq
[rdi
+rcx
*2], mm2
209 movq
[rdi
+rcx
*2+8], mm5
218 movq mm4
, [rsi
+rax
+8]
220 movq mm2
, [rsi
+rax
*2]
221 movq mm5
, [rsi
+rax
*2+8]
230 movq
[rdi
+rcx
+8], mm4
232 movq
[rdi
+rcx
*2], mm2
233 movq
[rdi
+rcx
*2+8], mm5
242 movq mm4
, [rsi
+rax
+8]
244 movq mm2
, [rsi
+rax
*2]
245 movq mm5
, [rsi
+rax
*2+8]
254 movq
[rdi
+rcx
+8], mm4
256 movq
[rdi
+rcx
*2], mm2
257 movq
[rdi
+rcx
*2+8], mm5
266 movq mm4
, [rsi
+rax
+8]
268 movq mm2
, [rsi
+rax
*2]
269 movq mm5
, [rsi
+rax
*2+8]
278 movq
[rdi
+rcx
+8], mm4
280 movq
[rdi
+rcx
*2], mm2
281 movq
[rdi
+rcx
*2+8], mm5
290 movq mm4
, [rsi
+rax
+8]
292 movq mm2
, [rsi
+rax
*2]
293 movq mm5
, [rsi
+rax
*2+8]
302 movq
[rdi
+rcx
+8], mm4
304 movq
[rdi
+rcx
*2], mm2
305 movq
[rdi
+rcx
*2+8], mm5