avformat/mxfdec: Check edit unit for overflow in mxf_set_current_edit_unit()
[FFMpeg-mirror.git] / libavfilter / x86 / vf_interlace.asm
blobc28f9fbe3e54b8192720cef260b39d753d4c84ee
1 ;*****************************************************************************
2 ;* x86-optimized functions for interlace filter
3 ;*
4 ;* Copyright (C) 2014 Kieran Kunhya <kierank@obe.tv>
5 ;* Copyright (c) 2014 Michael Niedermayer <michaelni@gmx.at>
6 ;* Copyright (c) 2017 Thomas Mundt <tmundt75@gmail.com>
7 ;*
8 ;* This file is part of FFmpeg.
9 ;*
10 ;* FFmpeg is free software; you can redistribute it and/or modify
11 ;* it under the terms of the GNU General Public License as published by
12 ;* the Free Software Foundation; either version 2 of the License, or
13 ;* (at your option) any later version.
15 ;* FFmpeg is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;* GNU General Public License for more details.
20 ;* You should have received a copy of the GNU General Public License along
21 ;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
22 ;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 ;******************************************************************************
25 %include "libavutil/x86/x86util.asm"
27 SECTION_RODATA
29 pw_4: times 8 dw 4
31 SECTION .text
33 %macro LOWPASS 1
34 add dstq, hq
35 add srcq, hq
36 add mrefq, srcq
37 add prefq, srcq
38 neg hq
40 pcmpeq%1 m6, m6
42 test hq, mmsize
43 je .loop
45 ;process 1 * mmsize
46 movu m0, [mrefq+hq]
47 pavg%1 m0, [prefq+hq]
48 pxor m0, m6
49 pxor m2, m6, [srcq+hq]
50 pavg%1 m0, m2
51 pxor m0, m6
52 movu [dstq+hq], m0
53 add hq, mmsize
54 jge .end
56 .loop:
57 movu m0, [mrefq+hq]
58 movu m1, [mrefq+hq+mmsize]
59 pavg%1 m0, [prefq+hq]
60 pavg%1 m1, [prefq+hq+mmsize]
61 pxor m0, m6
62 pxor m1, m6
63 pxor m2, m6, [srcq+hq]
64 pxor m3, m6, [srcq+hq+mmsize]
65 pavg%1 m0, m2
66 pavg%1 m1, m3
67 pxor m0, m6
68 pxor m1, m6
69 movu [dstq+hq], m0
70 movu [dstq+hq+mmsize], m1
72 add hq, 2*mmsize
73 jl .loop
75 .end:
76 RET
77 %endmacro
79 %macro LOWPASS_LINE 0
80 cglobal lowpass_line, 5, 5, 7, dst, h, src, mref, pref
81 LOWPASS b
83 cglobal lowpass_line_16, 5, 5, 7, dst, h, src, mref, pref
84 shl hq, 1
85 LOWPASS w
86 %endmacro
88 %macro LOWPASS_LINE_COMPLEX 0
89 cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref
90 pxor m7, m7
91 .loop:
92 movu m0, [srcq+mrefq]
93 movu m2, [srcq+prefq]
94 mova m1, m0
95 mova m3, m2
96 punpcklbw m0, m7
97 punpcklbw m2, m7
98 punpckhbw m1, m7
99 punpckhbw m3, m7
100 paddw m0, m2
101 paddw m1, m3
102 mova m6, m0
103 mova m5, m1
104 movu m2, [srcq]
105 mova m3, m2
106 punpcklbw m2, m7
107 punpckhbw m3, m7
108 paddw m0, m2
109 paddw m1, m3
110 psllw m2, 1
111 psllw m3, 1
112 paddw m0, m2
113 paddw m1, m3
114 psllw m0, 1
115 psllw m1, 1
116 pcmpgtw m6, m2
117 pcmpgtw m5, m3
118 packsswb m6, m5
119 movu m2, [srcq+mrefq*2]
120 movu m4, [srcq+prefq*2]
121 mova m3, m2
122 mova m5, m4
123 punpcklbw m2, m7
124 punpcklbw m4, m7
125 punpckhbw m3, m7
126 punpckhbw m5, m7
127 paddw m2, m4
128 paddw m3, m5
129 paddw m0, [pw_4]
130 paddw m1, [pw_4]
131 psubusw m0, m2
132 psubusw m1, m3
133 psrlw m0, 3
134 psrlw m1, 3
135 packuswb m0, m1
136 mova m1, m0
137 movu m2, [srcq]
138 pmaxub m0, m2
139 pminub m1, m2
140 pand m0, m6
141 pandn m6, m1
142 por m0, m6
143 movu [dstq], m0
145 add dstq, mmsize
146 add srcq, mmsize
147 sub hd, mmsize
148 jg .loop
151 cglobal lowpass_line_complex_12, 5, 5, 8, 16, dst, h, src, mref, pref, clip_max
152 movd m7, DWORD clip_maxm
153 SPLATW m7, m7, 0
154 movu [rsp], m7
155 .loop:
156 movu m0, [srcq+mrefq]
157 movu m1, [srcq+mrefq+mmsize]
158 movu m2, [srcq+prefq]
159 movu m3, [srcq+prefq+mmsize]
160 paddw m0, m2
161 paddw m1, m3
162 mova m6, m0
163 mova m7, m1
164 movu m2, [srcq]
165 movu m3, [srcq+mmsize]
166 paddw m0, m2
167 paddw m1, m3
168 psllw m2, 1
169 psllw m3, 1
170 paddw m0, m2
171 paddw m1, m3
172 psllw m0, 1
173 psllw m1, 1
174 pcmpgtw m6, m2
175 pcmpgtw m7, m3
176 movu m2, [srcq+2*mrefq]
177 movu m3, [srcq+2*mrefq+mmsize]
178 movu m4, [srcq+2*prefq]
179 movu m5, [srcq+2*prefq+mmsize]
180 paddw m2, m4
181 paddw m3, m5
182 paddw m0, [pw_4]
183 paddw m1, [pw_4]
184 psubusw m0, m2
185 psubusw m1, m3
186 psrlw m0, 3
187 psrlw m1, 3
188 pminsw m0, [rsp]
189 pminsw m1, [rsp]
190 mova m2, m0
191 mova m3, m1
192 movu m4, [srcq]
193 pmaxsw m0, m4
194 pminsw m2, m4
195 movu m4, [srcq + mmsize]
196 pmaxsw m1, m4
197 pminsw m3, m4
198 pand m0, m6
199 pand m1, m7
200 pandn m6, m2
201 pandn m7, m3
202 por m0, m6
203 por m1, m7
204 movu [dstq], m0
205 movu [dstq+mmsize], m1
207 add dstq, 2*mmsize
208 add srcq, 2*mmsize
209 sub hd, mmsize
210 jg .loop
212 %endmacro
214 INIT_XMM sse2
215 LOWPASS_LINE
217 INIT_XMM avx
218 LOWPASS_LINE
220 %if HAVE_AVX2_EXTERNAL
221 INIT_YMM avx2
222 LOWPASS_LINE
223 %endif
225 INIT_XMM sse2
226 LOWPASS_LINE_COMPLEX