avcodec/jpegxl_parse{,r}: fix integer overflow for some malformed files
[FFMpeg-mirror.git] / libavfilter / x86 / vf_blend.asm
blob362020ec95913c92ce6bf85c9093774357764258
1 ;*****************************************************************************
2 ;* x86-optimized functions for blend filter
3 ;*
4 ;* Copyright (C) 2015 Paul B Mahol
5 ;* Copyright (C) 2018 Henrik Gramner
6 ;* Copyright (C) 2018 Jokyo Images
7 ;*
8 ;* This file is part of FFmpeg.
9 ;*
10 ;* FFmpeg is free software; you can redistribute it and/or
11 ;* modify it under the terms of the GNU Lesser General Public
12 ;* License as published by the Free Software Foundation; either
13 ;* version 2.1 of the License, or (at your option) any later version.
15 ;* FFmpeg is distributed in the hope that it will be useful,
16 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;* Lesser General Public License for more details.
20 ;* You should have received a copy of the GNU Lesser General Public
21 ;* License along with FFmpeg; if not, write to the Free Software
22 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 ;******************************************************************************
25 %include "libavutil/x86/x86util.asm"
27 SECTION_RODATA
29 ps_255: times 4 dd 255.0
30 pd_32768 : times 4 dd 32768
31 pd_65535 : times 4 dd 65535
32 pw_1: times 8 dw 1
33 pw_128: times 8 dw 128
34 pw_255: times 8 dw 255
35 pb_127: times 16 db 127
36 pb_128: times 16 db 128
37 pb_255: times 16 db 255
39 SECTION .text
41 %macro BLEND_INIT 2-3 0
42 %if ARCH_X86_64
43 cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x
44 mov widthd, dword widthm
45 %if %3; is 16 bit
46 add widthq, widthq ; doesn't compile on x86_32
47 %endif
48 %else
49 cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x
50 %define dst_linesizeq r5mp
51 %define widthq r6mp
52 %endif
53 mov endd, dword r7m
54 add topq, widthq
55 add bottomq, widthq
56 add dstq, widthq
57 neg widthq
58 %endmacro
60 %macro BLEND_END 0
61 add topq, top_linesizeq
62 add bottomq, bottom_linesizeq
63 add dstq, dst_linesizeq
64 sub endd, 1
65 jg .nextrow
66 RET
67 %endmacro
69 %macro BLEND_SIMPLE 2-3 0
70 BLEND_INIT %1, 2, %3
71 .nextrow:
72 mov xq, widthq
74 .loop:
75 movu m0, [topq + xq]
76 movu m1, [bottomq + xq]
77 p%2 m0, m1
78 movu [dstq + xq], m0
79 add xq, mmsize
80 jl .loop
81 BLEND_END
82 %endmacro
84 ; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
85 %macro GRAINEXTRACT 3-4 0
86 BLEND_INIT %1, 6, %4
87 pxor m4, m4
88 %if %4 ; 16 bit
89 VBROADCASTI128 m5, [pd_32768]
90 %else
91 VBROADCASTI128 m5, [pw_128]
92 %endif
93 .nextrow:
94 mov xq, widthq
95 .loop:
96 movu m1, [topq + xq]
97 movu m3, [bottomq + xq]
99 punpckl%2%3 m0, m1, m4
100 punpckh%2%3 m1, m4
101 punpckl%2%3 m2, m3, m4
102 punpckh%2%3 m3, m4
104 padd%3 m0, m5
105 padd%3 m1, m5
106 psub%3 m0, m2
107 psub%3 m1, m3
109 packus%3%2 m0, m1
111 movu [dstq + xq], m0
112 add xq, mmsize
113 jl .loop
114 BLEND_END
115 %endmacro
117 %macro MULTIPLY 3 ; a, b, pw_1
118 pmullw %1, %2 ; xxxxxxxx a * b
119 paddw %1, %3
120 psrlw %2, %1, 8
121 paddw %1, %2
122 psrlw %1, 8 ; 00xx00xx a * b / 255
123 %endmacro
125 %macro SCREEN 4 ; a, b, pw_1, pw_255
126 pxor %1, %4 ; 00xx00xx 255 - a
127 pxor %2, %4
128 MULTIPLY %1, %2, %3
129 pxor %1, %4 ; 00xx00xx 255 - x / 255
130 %endmacro
132 %macro BLEND_MULTIPLY 0
133 BLEND_INIT multiply, 6
134 pxor m4, m4
135 VBROADCASTI128 m5, [pw_1]
136 .nextrow:
137 mov xq, widthq
139 .loop:
140 movu m1, [topq + xq]
141 movu m3, [bottomq + xq]
142 punpcklbw m0, m1, m4
143 punpckhbw m1, m4
144 punpcklbw m2, m3, m4
145 punpckhbw m3, m4
147 MULTIPLY m0, m2, m5
148 MULTIPLY m1, m3, m5
150 packuswb m0, m1
151 movu [dstq + xq], m0
152 add xq, mmsize
153 jl .loop
154 BLEND_END
155 %endmacro
157 %macro BLEND_SCREEN 0
158 BLEND_INIT screen, 7
159 pxor m4, m4
161 VBROADCASTI128 m5, [pw_1]
162 VBROADCASTI128 m6, [pw_255]
163 .nextrow:
164 mov xq, widthq
166 .loop:
167 movu m1, [topq + xq]
168 movu m3, [bottomq + xq]
169 punpcklbw m0, m1, m4
170 punpckhbw m1, m4
171 punpcklbw m2, m3, m4
172 punpckhbw m3, m4
174 SCREEN m0, m2, m5, m6
175 SCREEN m1, m3, m5, m6
177 packuswb m0, m1
178 movu [dstq + xq], m0
179 add xq, mmsize
180 jl .loop
181 BLEND_END
182 %endmacro
184 ;%1 name, %2 (b or w), %3 (set if 16 bit)
185 %macro AVERAGE 2-3 0
186 BLEND_INIT %1, 3, %3
187 pcmpeqb m2, m2
189 .nextrow:
190 mov xq, widthq
192 .loop:
193 movu m0, [topq + xq]
194 movu m1, [bottomq + xq]
195 pxor m0, m2
196 pxor m1, m2
197 pavg%2 m0, m1
198 pxor m0, m2
199 movu [dstq + xq], m0
200 add xq, mmsize
201 jl .loop
202 BLEND_END
203 %endmacro
205 ; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
206 %macro GRAINMERGE 3-4 0
207 BLEND_INIT %1, 6, %4
208 pxor m4, m4
209 %if %4 ; 16 bit
210 VBROADCASTI128 m5, [pd_32768]
211 %else
212 VBROADCASTI128 m5, [pw_128]
213 %endif
214 .nextrow:
215 mov xq, widthq
217 .loop:
218 movu m1, [topq + xq]
219 movu m3, [bottomq + xq]
221 punpckl%2%3 m0, m1, m4
222 punpckh%2%3 m1, m4
223 punpckl%2%3 m2, m3, m4
224 punpckh%2%3 m3, m4
226 padd%3 m0, m2
227 padd%3 m1, m3
228 psub%3 m0, m5
229 psub%3 m1, m5
231 packus%3%2 m0, m1
233 movu [dstq + xq], m0
234 add xq, mmsize
235 jl .loop
236 BLEND_END
237 %endmacro
239 %macro HARDMIX 0
240 BLEND_INIT hardmix, 5
241 VBROADCASTI128 m2, [pb_255]
242 VBROADCASTI128 m3, [pb_128]
243 VBROADCASTI128 m4, [pb_127]
244 .nextrow:
245 mov xq, widthq
247 .loop:
248 movu m0, [topq + xq]
249 movu m1, [bottomq + xq]
250 pxor m1, m4
251 pxor m0, m3
252 pcmpgtb m1, m0
253 pxor m1, m2
254 movu [dstq + xq], m1
255 add xq, mmsize
256 jl .loop
257 BLEND_END
258 %endmacro
260 %macro DIVIDE 0
261 BLEND_INIT divide, 4
262 pxor m2, m2
263 mova m3, [ps_255]
264 .nextrow:
265 mov xq, widthq
267 .loop:
268 movd m0, [topq + xq] ; 000000xx
269 movd m1, [bottomq + xq]
270 punpcklbw m0, m2 ; 00000x0x
271 punpcklbw m1, m2
272 punpcklwd m0, m2 ; 000x000x
273 punpcklwd m1, m2
275 cvtdq2ps m0, m0
276 cvtdq2ps m1, m1
277 divps m0, m1 ; a / b
278 mulps m0, m3 ; a / b * 255
279 minps m0, m3
280 cvttps2dq m0, m0
282 packssdw m0, m0 ; 00000x0x
283 packuswb m0, m0 ; 000000xx
284 movd [dstq + xq], m0
285 add xq, mmsize / 4
287 jl .loop
288 BLEND_END
289 %endmacro
291 %macro PHOENIX 2-3 0
292 ; %1 name, %2 b or w, %3 (opt) 1 if 16 bit
293 BLEND_INIT %1, 4, %3
294 VBROADCASTI128 m3, [pb_255]
295 .nextrow:
296 mov xq, widthq
298 .loop:
299 movu m0, [topq + xq]
300 movu m1, [bottomq + xq]
301 mova m2, m0
302 pminu%2 m0, m1
303 pmaxu%2 m1, m2
304 mova m2, m3
305 psubus%2 m2, m1
306 paddus%2 m2, m0
307 movu [dstq + xq], m2
308 add xq, mmsize
309 jl .loop
310 BLEND_END
311 %endmacro
313 ; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
314 %macro DIFFERENCE 3-4 0
315 BLEND_INIT %1, 5, %4
316 pxor m2, m2
317 .nextrow:
318 mov xq, widthq
320 .loop:
321 movu m0, [topq + xq]
322 movu m1, [bottomq + xq]
323 punpckh%2%3 m3, m0, m2
324 punpckl%2%3 m0, m2
325 punpckh%2%3 m4, m1, m2
326 punpckl%2%3 m1, m2
327 psub%3 m0, m1
328 psub%3 m3, m4
329 %if %4; 16 bit
330 pabsd m0, m0
331 pabsd m3, m3
332 %else
333 ABS2 m0, m3, m1, m4
334 %endif
335 packus%3%2 m0, m3
336 movu [dstq + xq], m0
337 add xq, mmsize
338 jl .loop
339 BLEND_END
340 %endmacro
342 ; %1 name , %2 src (b or w), %3 inter (w or d), %4 (1 if 16bit, not set if 8 bit)
343 %macro EXTREMITY 3-4 0
344 BLEND_INIT %1, 8, %4
345 pxor m2, m2
346 %if %4; 16 bit
347 VBROADCASTI128 m4, [pd_65535]
348 %else
349 VBROADCASTI128 m4, [pw_255]
350 %endif
351 .nextrow:
352 mov xq, widthq
354 .loop:
355 movu m0, [topq + xq]
356 movu m1, [bottomq + xq]
357 punpckh%2%3 m5, m0, m2
358 punpckl%2%3 m0, m2
359 punpckh%2%3 m6, m1, m2
360 punpckl%2%3 m1, m2
361 psub%3 m3, m4, m0
362 psub%3 m7, m4, m5
363 psub%3 m3, m1
364 psub%3 m7, m6
365 %if %4; 16 bit
366 pabsd m3, m3
367 pabsd m7, m7
368 %else
369 ABS2 m3, m7, m1, m6
370 %endif
371 packus%3%2 m3, m7
372 movu [dstq + xq], m3
373 add xq, mmsize
374 jl .loop
375 BLEND_END
376 %endmacro
378 %macro NEGATION 3-4 0
379 BLEND_INIT %1, 8, %4
380 pxor m2, m2
381 %if %4; 16 bit
382 VBROADCASTI128 m4, [pd_65535]
383 %else
384 VBROADCASTI128 m4, [pw_255]
385 %endif
386 .nextrow:
387 mov xq, widthq
389 .loop:
390 movu m0, [topq + xq]
391 movu m1, [bottomq + xq]
392 punpckh%2%3 m5, m0, m2
393 punpckl%2%3 m0, m2
394 punpckh%2%3 m6, m1, m2
395 punpckl%2%3 m1, m2
396 psub%3 m3, m4, m0
397 psub%3 m7, m4, m5
398 psub%3 m3, m1
399 psub%3 m7, m6
400 %if %4; 16 bit
401 pabsd m3, m3
402 pabsd m7, m7
403 %else
404 ABS2 m3, m7, m1, m6
405 %endif
406 psub%3 m0, m4, m3
407 psub%3 m1, m4, m7
408 packus%3%2 m0, m1
409 movu [dstq + xq], m0
410 add xq, mmsize
411 jl .loop
412 BLEND_END
413 %endmacro
415 INIT_XMM sse2
416 BLEND_SIMPLE xor, xor
417 BLEND_SIMPLE or, or
418 BLEND_SIMPLE and, and
419 BLEND_SIMPLE addition, addusb
420 BLEND_SIMPLE subtract, subusb
421 BLEND_SIMPLE darken, minub
422 BLEND_SIMPLE lighten, maxub
423 GRAINEXTRACT grainextract, b, w
424 BLEND_MULTIPLY
425 BLEND_SCREEN
426 AVERAGE average, b
427 GRAINMERGE grainmerge, b, w
428 HARDMIX
429 PHOENIX phoenix, b
430 DIFFERENCE difference, b, w
431 DIVIDE
432 EXTREMITY extremity, b, w
433 NEGATION negation, b, w
435 %if ARCH_X86_64
436 BLEND_SIMPLE addition_16, addusw, 1
437 BLEND_SIMPLE and_16, and, 1
438 BLEND_SIMPLE or_16, or, 1
439 BLEND_SIMPLE subtract_16, subusw, 1
440 BLEND_SIMPLE xor_16, xor, 1
441 AVERAGE average_16, w, 1
442 %endif
444 INIT_XMM ssse3
445 DIFFERENCE difference, b, w
446 EXTREMITY extremity, b, w
447 NEGATION negation, b, w
449 INIT_XMM sse4
450 %if ARCH_X86_64
451 BLEND_SIMPLE darken_16, minuw, 1
452 BLEND_SIMPLE lighten_16, maxuw, 1
453 GRAINEXTRACT grainextract_16, w, d, 1
454 GRAINMERGE grainmerge_16, w, d, 1
455 PHOENIX phoenix_16, w, 1
456 DIFFERENCE difference_16, w, d, 1
457 EXTREMITY extremity_16, w, d, 1
458 NEGATION negation_16, w, d, 1
459 %endif
461 %if HAVE_AVX2_EXTERNAL
462 INIT_YMM avx2
463 BLEND_SIMPLE xor, xor
464 BLEND_SIMPLE or, or
465 BLEND_SIMPLE and, and
466 BLEND_SIMPLE addition, addusb
467 BLEND_SIMPLE subtract, subusb
468 BLEND_SIMPLE darken, minub
469 BLEND_SIMPLE lighten, maxub
470 GRAINEXTRACT grainextract, b, w
471 BLEND_MULTIPLY
472 BLEND_SCREEN
473 AVERAGE average, b
474 GRAINMERGE grainmerge, b, w
475 HARDMIX
476 PHOENIX phoenix, b
478 DIFFERENCE difference, b, w
479 EXTREMITY extremity, b, w
480 NEGATION negation, b, w
482 %if ARCH_X86_64
483 BLEND_SIMPLE addition_16, addusw, 1
484 BLEND_SIMPLE and_16, and, 1
485 BLEND_SIMPLE darken_16, minuw, 1
486 BLEND_SIMPLE lighten_16, maxuw, 1
487 BLEND_SIMPLE or_16, or, 1
488 BLEND_SIMPLE subtract_16, subusw, 1
489 BLEND_SIMPLE xor_16, xor, 1
490 GRAINEXTRACT grainextract_16, w, d, 1
491 AVERAGE average_16, w, 1
492 GRAINMERGE grainmerge_16, w, d, 1
493 PHOENIX phoenix_16, w, 1
494 DIFFERENCE difference_16, w, d, 1
495 EXTREMITY extremity_16, w, d, 1
496 NEGATION negation_16, w, d, 1
497 %endif
498 %endif