avcodec/jpegxl_parse{,r}: fix integer overflow for some malformed files
[FFMpeg-mirror.git] / libavfilter / x86 / vf_overlay.asm
blob8eecbbd2b2077a4c37b9b8cae811a034897b38b8
1 ;*****************************************************************************
2 ;* x86-optimized functions for overlay filter
3 ;*
4 ;* Copyright (C) 2018 Paul B Mahol
5 ;* Copyright (C) 2018 Henrik Gramner
6 ;*
7 ;* This file is part of FFmpeg.
8 ;*
9 ;* FFmpeg is free software; you can redistribute it and/or
10 ;* modify it under the terms of the GNU Lesser General Public
11 ;* License as published by the Free Software Foundation; either
12 ;* version 2.1 of the License, or (at your option) any later version.
14 ;* FFmpeg is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;* Lesser General Public License for more details.
19 ;* You should have received a copy of the GNU Lesser General Public
20 ;* License along with FFmpeg; if not, write to the Free Software
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 ;*****************************************************************************
24 %include "libavutil/x86/x86util.asm"
26 SECTION_RODATA
28 pb_1: times 16 db 1
29 pw_128: times 8 dw 128
30 pw_255: times 8 dw 255
31 pw_257: times 8 dw 257
33 SECTION .text
35 INIT_XMM sse4
36 cglobal overlay_row_44, 5, 7, 6, 0, dst, da, s, a, w, r, x
37 xor xq, xq
38 movsxdifnidn wq, wd
39 mov rq, wq
40 and rq, mmsize/2 - 1
41 cmp wq, mmsize/2
42 jl .end
43 sub wq, rq
44 mova m3, [pw_255]
45 mova m4, [pw_128]
46 mova m5, [pw_257]
47 .loop:
48 pmovzxbw m0, [sq+xq]
49 pmovzxbw m2, [aq+xq]
50 pmovzxbw m1, [dstq+xq]
51 pmullw m0, m2
52 pxor m2, m3
53 pmullw m1, m2
54 paddw m0, m4
55 paddw m0, m1
56 pmulhuw m0, m5
57 packuswb m0, m0
58 movq [dstq+xq], m0
59 add xq, mmsize/2
60 cmp xq, wq
61 jl .loop
63 .end:
64 mov eax, xd
65 RET
67 INIT_XMM sse4
68 cglobal overlay_row_22, 5, 7, 6, 0, dst, da, s, a, w, r, x
69 xor xq, xq
70 movsxdifnidn wq, wd
71 sub wq, 1
72 mov rq, wq
73 and rq, mmsize/2 - 1
74 cmp wq, mmsize/2
75 jl .end
76 sub wq, rq
77 mova m3, [pw_255]
78 mova m4, [pw_128]
79 mova m5, [pw_257]
80 .loop:
81 pmovzxbw m0, [sq+xq]
82 movu m1, [aq+2*xq]
83 pandn m2, m3, m1
84 psllw m1, 8
85 pavgw m2, m1
86 pavgw m2, m1
87 psrlw m2, 8
88 pmovzxbw m1, [dstq+xq]
89 pmullw m0, m2
90 pxor m2, m3
91 pmullw m1, m2
92 paddw m0, m4
93 paddw m0, m1
94 pmulhuw m0, m5
95 packuswb m0, m0
96 movq [dstq+xq], m0
97 add xq, mmsize/2
98 cmp xq, wq
99 jl .loop
101 .end:
102 mov eax, xd
105 INIT_XMM sse4
106 cglobal overlay_row_20, 6, 7, 7, 0, dst, da, s, a, w, r, x
107 mov daq, aq
108 add daq, rmp
109 xor xq, xq
110 movsxdifnidn wq, wd
111 sub wq, 1
112 mov rq, wq
113 and rq, mmsize/2 - 1
114 cmp wq, mmsize/2
115 jl .end
116 sub wq, rq
117 mova m3, [pw_255]
118 mova m4, [pw_128]
119 mova m5, [pw_257]
120 mova m6, [pb_1]
121 .loop:
122 pmovzxbw m0, [sq+xq]
123 movu m2, [aq+2*xq]
124 movu m1, [daq+2*xq]
125 pmaddubsw m2, m6
126 pmaddubsw m1, m6
127 paddw m2, m1
128 psrlw m2, 2
129 pmovzxbw m1, [dstq+xq]
130 pmullw m0, m2
131 pxor m2, m3
132 pmullw m1, m2
133 paddw m0, m4
134 paddw m0, m1
135 pmulhuw m0, m5
136 packuswb m0, m0
137 movq [dstq+xq], m0
138 add xq, mmsize/2
139 cmp xq, wq
140 jl .loop
142 .end:
143 mov eax, xd