avcodec/jpegxl_parse{,r}: fix integer overflow for some malformed files
[FFMpeg-mirror.git] / libavfilter / x86 / vf_nlmeans.asm
blob8f5780103513bfa6f63b86291a4987fe4095f5aa
1 ;*****************************************************************************
2 ;* x86-optimized functions for nlmeans filter
3 ;*
4 ;* This file is part of FFmpeg.
5 ;*
6 ;* FFmpeg is free software; you can redistribute it and/or
7 ;* modify it under the terms of the GNU Lesser General Public
8 ;* License as published by the Free Software Foundation; either
9 ;* version 2.1 of the License, or (at your option) any later version.
11 ;* FFmpeg is distributed in the hope that it will be useful,
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;* Lesser General Public License for more details.
16 ;* You should have received a copy of the GNU Lesser General Public
17 ;* License along with FFmpeg; if not, write to the Free Software
18 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 ;******************************************************************************
22 %include "libavutil/x86/x86util.asm"
24 %if HAVE_AVX2_EXTERNAL && ARCH_X86_64
26 SECTION_RODATA 32
28 ending_lut: dd -1, -1, -1, -1, -1, -1, -1, -1,\
29 0, -1, -1, -1, -1, -1, -1, -1,\
30 0, 0, -1, -1, -1, -1, -1, -1,\
31 0, 0, 0, -1, -1, -1, -1, -1,\
32 0, 0, 0, 0, -1, -1, -1, -1,\
33 0, 0, 0, 0, 0, -1, -1, -1,\
34 0, 0, 0, 0, 0, 0, -1, -1,\
35 0, 0, 0, 0, 0, 0, 0, -1,\
36 0, 0, 0, 0, 0, 0, 0, 0
38 SECTION .text
40 ; void ff_compute_weights_line(const uint32_t *const iia,
41 ; const uint32_t *const iib,
42 ; const uint32_t *const iid,
43 ; const uint32_t *const iie,
44 ; const uint8_t *const src,
45 ; float *total,
46 ; float *sum,
47 ; const float *const lut,
48 ; int max,
49 ; int startx, int endx);
51 INIT_YMM avx2
52 cglobal compute_weights_line, 8, 13, 5, 0, iia, iib, iid, iie, src, total, sum, lut, x, startx, endx, mod, elut
53 movsxd startxq, dword startxm
54 movsxd endxq, dword endxm
55 VPBROADCASTD m2, r8m
57 mov xq, startxq
58 mov modq, mmsize / 4
59 lea elutq, [ending_lut]
61 vpcmpeqd m4, m4
63 .loop:
64 mov startxq, endxq
65 sub startxq, xq
66 cmp startxq, modq
67 cmovge startxq, modq
68 sal startxq, 5
70 movu m0, [iieq + xq * 4]
72 psubd m0, [iidq + xq * 4]
73 psubd m0, [iibq + xq * 4]
74 paddd m0, [iiaq + xq * 4]
75 por m0, [elutq + startxq]
76 pminud m0, m2
77 pslld m0, 2
78 mova m3, m4
79 vgatherdps m1, [lutq + m0], m3
81 pmovzxbd m0, [srcq + xq]
82 cvtdq2ps m0, m0
84 mulps m0, m1
86 addps m1, [totalq + xq * 4]
87 addps m0, [sumq + xq * 4]
89 movups [totalq + xq * 4], m1
90 movups [sumq + xq * 4], m0
92 add xq, mmsize / 4
93 cmp xq, endxq
94 jl .loop
95 RET
97 %endif