1 ;******************************************************************************
2 ;* MMX/SSE2-optimized functions for the VP6 decoder
3 ;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
4 ;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
6 ;* This file is part of Libav.
8 ;* Libav is free software; you can redistribute it and/or
9 ;* modify it under the terms of the GNU Lesser General Public
10 ;* License as published by the Free Software Foundation; either
11 ;* version 2.1 of the License, or (at your option) any later version.
13 ;* Libav is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;* Lesser General Public License for more details.
18 ;* You should have received a copy of the GNU Lesser General Public
19 ;* License along with Libav; if not, write to the Free Software
20 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;******************************************************************************
23 %include "libavutil/x86/x86util.asm"
39 pmullw m0
, [rsp
+8*11] ; src[x-8 ] * biweight [0]
40 pmullw m1
, [rsp
+8*12] ; src[x ] * biweight [1]
41 pmullw m3
, [rsp
+8*11] ; src[x-8 ] * biweight [0]
42 pmullw m4
, [rsp
+8*12] ; src[x ] * biweight [1]
53 pmullw m1
, [rsp
+8*13] ; src[x+8 ] * biweight [2]
54 pmullw m2
, [rsp
+8*14] ; src[x+16] * biweight [3]
55 pmullw m4
, [rsp
+8*13] ; src[x+8 ] * biweight [2]
56 pmullw m5
, [rsp
+8*14] ; src[x+16] * biweight [3]
61 paddsw m0
, m6
; Add 64
62 paddsw m3
, m6
; Add 64
72 pmullw m0
, m4
; src[x-8 ] * biweight [0]
73 pmullw m1
, m5
; src[x ] * biweight [1]
79 pmullw m1
, m6
; src[x+8 ] * biweight [2]
80 pmullw m2
, m3
; src[x+16] * biweight [3]
83 paddsw m0
, [pw_64
] ; Add 64
87 %endif
; mmsize == 8/16
114 %endif
; mmsize == 8/16
117 %macro vp6_filter_diag4
0
118 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
119 ; const int16_t h_weight[4], const int16_t v_weights[4])
120 cglobal vp6_filter_diag4
, 5, 7, 8
121 mov r5
, rsp
; backup stack pointer
122 and rsp
, ~
(mmsize
-1) ; align stack
139 DIAG4 r1
, -1, 0, 1, 2, r3
151 DIAG4 r3
, -8, 0, 8, 16, r0
157 mov rsp
, r5
; restore stack pointer