Removed unused vp8_recon_intra4x4mb function
[libvpx.git] / vp8 / encoder / x86 / quantize_ssse3.asm
blob2f33199e54bfeeb0b5c47e24e746ef3178c844b5
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree.
11 %include "vpx_ports/x86_abi_support.asm"
14 ;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
15 ; short *qcoeff_ptr,short *dequant_ptr,
16 ; short *round_ptr,
17 ; short *quant_ptr, short *dqcoeff_ptr);
19 global sym(vp8_fast_quantize_b_impl_ssse3)
20 sym(vp8_fast_quantize_b_impl_ssse3):
21 push rbp
22 mov rbp, rsp
23 SHADOW_ARGS_TO_STACK 6
24 GET_GOT rbx
25 push rsi
26 push rdi
27 ; end prolog
29 mov rdx, arg(0) ;coeff_ptr
30 mov rdi, arg(3) ;round_ptr
31 mov rsi, arg(4) ;quant_ptr
33 movdqa xmm0, [rdx]
34 movdqa xmm4, [rdx + 16]
36 movdqa xmm2, [rdi] ;round lo
37 movdqa xmm3, [rdi + 16] ;round hi
39 movdqa xmm1, xmm0
40 movdqa xmm5, xmm4
42 psraw xmm0, 15 ;sign of z (aka sz)
43 psraw xmm4, 15 ;sign of z (aka sz)
45 pabsw xmm1, xmm1
46 pabsw xmm5, xmm5
48 paddw xmm1, xmm2
49 paddw xmm5, xmm3
51 pmulhw xmm1, [rsi]
52 pmulhw xmm5, [rsi + 16]
54 mov rdi, arg(1) ;qcoeff_ptr
55 mov rcx, arg(2) ;dequant_ptr
56 mov rsi, arg(5) ;dqcoeff_ptr
58 pxor xmm1, xmm0
59 pxor xmm5, xmm4
60 psubw xmm1, xmm0
61 psubw xmm5, xmm4
63 movdqa [rdi], xmm1
64 movdqa [rdi + 16], xmm5
66 movdqa xmm2, [rcx]
67 movdqa xmm3, [rcx + 16]
69 pxor xmm4, xmm4
70 pmullw xmm2, xmm1
71 pmullw xmm3, xmm5
73 pcmpeqw xmm1, xmm4 ;non zero mask
74 pcmpeqw xmm5, xmm4 ;non zero mask
75 packsswb xmm1, xmm5
76 pshufb xmm1, [ GLOBAL(zz_shuf)]
78 pmovmskb edx, xmm1
80 ; xor ecx, ecx
81 ; mov eax, -1
82 ;find_eob_loop:
83 ; shr edx, 1
84 ; jc fq_skip
85 ; mov eax, ecx
86 ;fq_skip:
87 ; inc ecx
88 ; cmp ecx, 16
89 ; jne find_eob_loop
90 xor rdi, rdi
91 mov eax, -1
92 xor dx, ax ;flip the bits for bsr
93 bsr eax, edx
95 movdqa [rsi], xmm2 ;store dqcoeff
96 movdqa [rsi + 16], xmm3 ;store dqcoeff
98 sub edi, edx ;check for all zeros in bit mask
99 sar edi, 31 ;0 or -1
100 add eax, 1
101 and eax, edi ;if the bit mask was all zero,
102 ;then eob = 0
103 ; begin epilog
104 pop rdi
105 pop rsi
106 RESTORE_GOT
107 UNSHADOW_ARGS
108 pop rbp
111 SECTION_RODATA
112 align 16
113 zz_shuf:
114 db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15