2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 %include "vpx_ports/x86_abi_support.asm"
14 ;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
15 global sym
(vp8_short_inv_walsh4x4_sse2
)
16 sym
(vp8_short_inv_walsh4x4_sse2
):
19 SHADOW_ARGS_TO_STACK
2
29 movdqa xmm0
, [rsi
+ 0] ;ip[4] ip[0]
30 movdqa xmm1
, [rsi
+ 16] ;ip[12] ip[8]
35 pshufd xmm2
, xmm1
, 4eh
;ip[8] ip[12]
36 movdqa xmm3
, xmm0
;ip[4] ip[0]
38 paddw xmm0
, xmm2
;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
39 psubw xmm3
, xmm2
;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
42 punpcklqdq xmm0
, xmm3
;d1 a1
43 punpckhqdq xmm4
, xmm3
;c1 b1
46 movdqa xmm1
, xmm4
;c1 b1
47 paddw xmm4
, xmm0
;dl+cl a1+b1 aka op[4] op[0]
48 psubw xmm0
, xmm1
;d1-c1 a1-b1 aka op[12] op[8]
51 ;; movdqu [rdi + 0], xmm4
52 ;; movdqu [rdi + 16], xmm3
54 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55 ; 13 12 11 10 03 02 01 00
57 ; 33 32 31 30 23 22 21 20
59 movdqa xmm3
, xmm4
; 13 12 11 10 03 02 01 00
60 punpcklwd xmm4
, xmm0
; 23 03 22 02 21 01 20 00
61 punpckhwd xmm3
, xmm0
; 33 13 32 12 31 11 30 10
62 movdqa xmm1
, xmm4
; 23 03 22 02 21 01 20 00
63 punpcklwd xmm4
, xmm3
; 31 21 11 01 30 20 10 00
64 punpckhwd xmm1
, xmm3
; 33 23 13 03 32 22 12 02
65 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
66 pshufd xmm2
, xmm1
, 4eh
;ip[8] ip[12]
67 movdqa xmm3
, xmm4
;ip[4] ip[0]
69 pshufd xmm7
, xmm7
, 0 ;03 03 03 03 03 03 03 03
71 paddw xmm4
, xmm2
;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
72 psubw xmm3
, xmm2
;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
75 punpcklqdq xmm4
, xmm3
;d1 a1
76 punpckhqdq xmm5
, xmm3
;c1 b1
78 movdqa xmm1
, xmm5
;c1 b1
79 paddw xmm5
, xmm4
;dl+cl a1+b1 aka op[4] op[0]
80 psubw xmm4
, xmm1
;d1-c1 a1-b1 aka op[12] op[8]
81 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
82 ; 13 12 11 10 03 02 01 00
84 ; 33 32 31 30 23 22 21 20
86 movdqa xmm0
, xmm5
; 13 12 11 10 03 02 01 00
87 punpcklwd xmm5
, xmm4
; 23 03 22 02 21 01 20 00
88 punpckhwd xmm0
, xmm4
; 33 13 32 12 31 11 30 10
89 movdqa xmm1
, xmm5
; 23 03 22 02 21 01 20 00
90 punpcklwd xmm5
, xmm0
; 31 21 11 01 30 20 10 00
91 punpckhwd xmm1
, xmm0
; 33 23 13 03 32 22 12 02
92 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
99 movdqa
[rdi
+ 0], xmm5
100 movdqa
[rdi
+ 16], xmm1