swscale/aarch64/output.S: refactor ff_yuv2plane1_8_neon
[FFMpeg-mirror.git] / libavfilter / x86 / vf_limiter.asm
blobe1b80eba61ce71d6bcf8821e89523546574bafa1
1 ;*****************************************************************************
2 ;* x86-optimized functions for limiter filter
3 ;*
4 ;* This file is part of FFmpeg.
5 ;*
6 ;* FFmpeg is free software; you can redistribute it and/or
7 ;* modify it under the terms of the GNU Lesser General Public
8 ;* License as published by the Free Software Foundation; either
9 ;* version 2.1 of the License, or (at your option) any later version.
11 ;* FFmpeg is distributed in the hope that it will be useful,
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;* Lesser General Public License for more details.
16 ;* You should have received a copy of the GNU Lesser General Public
17 ;* License along with FFmpeg; if not, write to the Free Software
18 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 ;******************************************************************************
21 %include "libavutil/x86/x86util.asm"
23 SECTION .text
25 INIT_XMM sse2
27 cglobal limiter_8bit, 6, 7, 3, src, dst, slinesize, dlinesize, w, h, x
28 movsxdifnidn wq, wd
29 add srcq, wq
30 add dstq, wq
31 neg wq
32 movd m1, r6m
33 punpcklbw m1, m1
34 SPLATW m1, m1
35 movd m2, r7m
36 punpcklbw m2, m2
37 SPLATW m2, m2
38 .nextrow:
39 mov xq, wq
41 .loop:
42 movu m0, [srcq + xq]
43 CLIPUB m0, m1, m2
44 movu [dstq+xq], m0
45 add xq, mmsize
46 jl .loop
48 add srcq, slinesizeq
49 add dstq, dlinesizeq
50 sub hd, 1
51 jg .nextrow
52 RET
54 INIT_XMM sse4
56 cglobal limiter_16bit, 6, 7, 3, src, dst, slinesize, dlinesize, w, h, x
57 shl wd, 1
58 add srcq, wq
59 add dstq, wq
60 neg wq
61 movd m1, r6m
62 SPLATW m1, m1
63 movd m2, r7m
64 SPLATW m2, m2
65 .nextrow:
66 mov xq, wq
68 .loop:
69 movu m0, [srcq + xq]
70 pmaxuw m0, m1
71 pminuw m0, m2
72 movu [dstq+xq], m0
73 add xq, mmsize
74 jl .loop
76 add srcq, slinesizeq
77 add dstq, dlinesizeq
78 sub hd, 1
79 jg .nextrow
80 RET