1 ;******************************************************************************
2 ;* x86 optimizations for PNG decoding
4 ;* Copyright (c) 2008 Loren Merritt <lorenm@u.washington.edu>
5 ;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
7 ;* This file is part of Libav.
9 ;* Libav is free software; you can redistribute it and/or
10 ;* modify it under the terms of the GNU Lesser General Public
11 ;* License as published by the Free Software Foundation; either
12 ;* version 2.1 of the License, or (at your option) any later version.
14 ;* Libav is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;* Lesser General Public License for more details.
19 ;* You should have received a copy of the GNU Lesser General Public
20 ;* License along with Libav; if not, write to the Free Software
21 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 ;******************************************************************************
24 %include "libavutil/x86/x86util.asm"
32 ; %1 = nr. of xmm registers used
34 cglobal add_bytes_l2
, 4, 6, %1, dst
, src1
, src2
, wa
, w
, i
42 and waq
, ~
(mmsize
*2-1)
46 mova m1
, [src1q
+iq
+mmsize
]
48 paddb m1
, [src2q
+iq
+mmsize
]
50 mova
[dstq
+iq
+mmsize
], m1
71 ; scalar loop for leftover
92 %macro ADD_PAETH_PRED_FN
1
93 cglobal add_png_paeth_prediction
, 5, 7, %1, dst
, src
, top
, w
, bpp
, end, cntr
98 lea endq
, [dstq
+wq
-(mmsize
/2-1)]
106 shr cntrq
, 2 + mmsize
/16
108 lea dstq
, [dstq
+cntrq
*(mmsize
/2)]
128 %else
; !cpuflag(ssse3)
138 %endif
; cpuflag(ssse3)