2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree.
11 %include "vpx_ports/x86_abi_support.asm"
14 ;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
15 ; short *qcoeff_ptr,short *dequant_ptr,
17 ; short *quant_ptr, short *dqcoeff_ptr);
19 global sym
(vp8_fast_quantize_b_impl_ssse3
)
20 sym
(vp8_fast_quantize_b_impl_ssse3
):
23 SHADOW_ARGS_TO_STACK
6
29 mov rdx
, arg
(0) ;coeff_ptr
30 mov rdi
, arg
(3) ;round_ptr
31 mov rsi
, arg
(4) ;quant_ptr
34 movdqa xmm4
, [rdx
+ 16]
36 movdqa xmm2
, [rdi
] ;round lo
37 movdqa xmm3
, [rdi
+ 16] ;round hi
42 psraw xmm0
, 15 ;sign of z (aka sz)
43 psraw xmm4
, 15 ;sign of z (aka sz)
52 pmulhw xmm5
, [rsi
+ 16]
54 mov rdi
, arg
(1) ;qcoeff_ptr
55 mov rcx
, arg
(2) ;dequant_ptr
56 mov rsi
, arg
(5) ;dqcoeff_ptr
64 movdqa
[rdi
+ 16], xmm5
67 movdqa xmm3
, [rcx
+ 16]
73 pcmpeqw xmm1
, xmm4
;non zero mask
74 pcmpeqw xmm5
, xmm4
;non zero mask
76 pshufb xmm1
, [ GLOBAL(zz_shuf
)]
92 xor dx, ax ;flip the bits for bsr
95 movdqa
[rsi
], xmm2
;store dqcoeff
96 movdqa
[rsi
+ 16], xmm3
;store dqcoeff
98 sub edi, edx ;check for all zeros in bit mask
101 and eax, edi ;if the bit mask was all zero,
114 db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15