avformat/mov: check for tts_count before deferencing tts_data
[FFMpeg-mirror.git] / libswscale / aarch64 / range_convert_neon.S
blobc0eb7143339c480e75e7e0b0dc7ed0a96f2af889
1 /*
2  * Copyright (c) 2024 Ramiro Polla
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
21 #include "libavutil/aarch64/asm.S"
23 .macro lumConvertRange fromto, bit_depth
24 function ff_lumRange\fromto\()Jpeg\bit_depth\()_neon, export=1
25 // x0  int16_t *dst
26 // w1  int width
27 // w2  uint32_t coeff
28 // x3  int64_t offset
29 .if \bit_depth == 16
30 .ifc \fromto, To
31         movi            v25.4s, #1
32         movi            v24.4s, #1<<3, lsl #16
33         sub             v24.4s, v24.4s, v25.4s
34 .endif
35         dup             v25.4s, w2
36         dup             v26.2d, x3
38         ld1             {v0.4s, v1.4s}, [x0]
39         mov             v16.16b, v26.16b
40         mov             v17.16b, v26.16b
41         mov             v18.16b, v26.16b
42         mov             v19.16b, v26.16b
43         smlal           v16.2d, v0.2s, v25.2s
44         smlal2          v17.2d, v0.4s, v25.4s
45         smlal           v18.2d, v1.2s, v25.2s
46         smlal2          v19.2d, v1.4s, v25.4s
47         shrn            v0.2s, v16.2d, 18
48         shrn2           v0.4s, v17.2d, 18
49         shrn            v1.2s, v18.2d, 18
50         shrn2           v1.4s, v19.2d, 18
51         subs            w1, w1, #8
52 .ifc \fromto, To
53         smin            v0.4s, v0.4s, v24.4s
54         smin            v1.4s, v1.4s, v24.4s
55 .endif
56         st1             {v0.4s, v1.4s}, [x0], #32
57         b.gt            1b
58 .else
59         dup             v25.4s, w2
60         dup             v26.4s, w3
62         ld1             {v0.8h}, [x0]
63         mov             v16.16b, v26.16b
64         mov             v18.16b, v26.16b
65         sxtl            v20.4s, v0.4h
66         sxtl2           v22.4s, v0.8h
67         mla             v16.4s, v20.4s, v25.4s
68         mla             v18.4s, v22.4s, v25.4s
69 .ifc \fromto, To
70         sqshrn          v0.4h, v16.4s, 14
71         sqshrn2         v0.8h, v18.4s, 14
72 .else
73         shrn            v0.4h, v16.4s, 14
74         shrn2           v0.8h, v18.4s, 14
75 .endif
76         subs            w1, w1, #8
77         st1             {v0.8h}, [x0], #16
78         b.gt            1b
79 .endif
80         ret
81 endfunc
82 .endm
84 .macro chrConvertRange fromto, bit_depth
85 function ff_chrRange\fromto\()Jpeg\bit_depth\()_neon, export=1
86 // x0  int16_t *dstU
87 // x1  int16_t *dstV
88 // w2  int width
89 // w3  uint32_t coeff
90 // x4  int64_t offset
91 .if \bit_depth == 16
92 .ifc \fromto, To
93         movi            v25.4s, #1
94         movi            v24.4s, #1<<3, lsl #16
95         sub             v24.4s, v24.4s, v25.4s
96 .endif
97         dup             v25.4s, w3
98         dup             v26.2d, x4
100         ld1             {v0.4s, v1.4s}, [x0]
101         ld1             {v2.4s, v3.4s}, [x1]
102         mov             v16.16b, v26.16b
103         mov             v17.16b, v26.16b
104         mov             v18.16b, v26.16b
105         mov             v19.16b, v26.16b
106         mov             v20.16b, v26.16b
107         mov             v21.16b, v26.16b
108         mov             v22.16b, v26.16b
109         mov             v23.16b, v26.16b
110         smlal           v16.2d, v0.2s, v25.2s
111         smlal2          v17.2d, v0.4s, v25.4s
112         smlal           v18.2d, v1.2s, v25.2s
113         smlal2          v19.2d, v1.4s, v25.4s
114         smlal           v20.2d, v2.2s, v25.2s
115         smlal2          v21.2d, v2.4s, v25.4s
116         smlal           v22.2d, v3.2s, v25.2s
117         smlal2          v23.2d, v3.4s, v25.4s
118         shrn            v0.2s, v16.2d, 18
119         shrn2           v0.4s, v17.2d, 18
120         shrn            v1.2s, v18.2d, 18
121         shrn2           v1.4s, v19.2d, 18
122         shrn            v2.2s, v20.2d, 18
123         shrn2           v2.4s, v21.2d, 18
124         shrn            v3.2s, v22.2d, 18
125         shrn2           v3.4s, v23.2d, 18
126         subs            w2, w2, #8
127 .ifc \fromto, To
128         smin            v0.4s, v0.4s, v24.4s
129         smin            v1.4s, v1.4s, v24.4s
130         smin            v2.4s, v2.4s, v24.4s
131         smin            v3.4s, v3.4s, v24.4s
132 .endif
133         st1             {v0.4s, v1.4s}, [x0], #32
134         st1             {v2.4s, v3.4s}, [x1], #32
135         b.gt            1b
136 .else
137         dup             v25.4s, w3
138         dup             v26.4s, w4
140         ld1             {v0.8h}, [x0]
141         ld1             {v1.8h}, [x1]
142         mov             v16.16b, v26.16b
143         mov             v17.16b, v26.16b
144         mov             v18.16b, v26.16b
145         mov             v19.16b, v26.16b
146         sxtl            v20.4s, v0.4h
147         sxtl            v21.4s, v1.4h
148         sxtl2           v22.4s, v0.8h
149         sxtl2           v23.4s, v1.8h
150         mla             v16.4s, v20.4s, v25.4s
151         mla             v17.4s, v21.4s, v25.4s
152         mla             v18.4s, v22.4s, v25.4s
153         mla             v19.4s, v23.4s, v25.4s
154 .ifc \fromto, To
155         sqshrn          v0.4h, v16.4s, 14
156         sqshrn          v1.4h, v17.4s, 14
157         sqshrn2         v0.8h, v18.4s, 14
158         sqshrn2         v1.8h, v19.4s, 14
159 .else
160         shrn            v0.4h, v16.4s, 14
161         shrn            v1.4h, v17.4s, 14
162         shrn2           v0.8h, v18.4s, 14
163         shrn2           v1.8h, v19.4s, 14
164 .endif
165         subs            w2, w2, #8
166         st1             {v0.8h}, [x0], #16
167         st1             {v1.8h}, [x1], #16
168         b.gt            1b
169 .endif
170         ret
171 endfunc
172 .endm
174 lumConvertRange To,    8
175 lumConvertRange To,   16
176 chrConvertRange To,    8
177 chrConvertRange To,   16
178 lumConvertRange From,  8
179 lumConvertRange From, 16
180 chrConvertRange From,  8
181 chrConvertRange From, 16