2 * Copyright (c) 2024 Ramiro Polla
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/aarch64/asm.S"
23 .macro lumConvertRange fromto, bit_depth
24 function ff_lumRange\fromto\()Jpeg\bit_depth\()_neon, export=1
32 movi v24.4s, #1<<3, lsl #16
33 sub v24.4s, v24.4s, v25.4s
38 ld1 {v0.4s, v1.4s}, [x0]
43 smlal v16.2d, v0.2s, v25.2s
44 smlal2 v17.2d, v0.4s, v25.4s
45 smlal v18.2d, v1.2s, v25.2s
46 smlal2 v19.2d, v1.4s, v25.4s
47 shrn v0.2s, v16.2d, 18
48 shrn2 v0.4s, v17.2d, 18
49 shrn v1.2s, v18.2d, 18
50 shrn2 v1.4s, v19.2d, 18
53 smin v0.4s, v0.4s, v24.4s
54 smin v1.4s, v1.4s, v24.4s
56 st1 {v0.4s, v1.4s}, [x0], #32
67 mla v16.4s, v20.4s, v25.4s
68 mla v18.4s, v22.4s, v25.4s
70 sqshrn v0.4h, v16.4s, 14
71 sqshrn2 v0.8h, v18.4s, 14
73 shrn v0.4h, v16.4s, 14
74 shrn2 v0.8h, v18.4s, 14
77 st1 {v0.8h}, [x0], #16
84 .macro chrConvertRange fromto, bit_depth
85 function ff_chrRange\fromto\()Jpeg\bit_depth\()_neon, export=1
94 movi v24.4s, #1<<3, lsl #16
95 sub v24.4s, v24.4s, v25.4s
100 ld1 {v0.4s, v1.4s}, [x0]
101 ld1 {v2.4s, v3.4s}, [x1]
110 smlal v16.2d, v0.2s, v25.2s
111 smlal2 v17.2d, v0.4s, v25.4s
112 smlal v18.2d, v1.2s, v25.2s
113 smlal2 v19.2d, v1.4s, v25.4s
114 smlal v20.2d, v2.2s, v25.2s
115 smlal2 v21.2d, v2.4s, v25.4s
116 smlal v22.2d, v3.2s, v25.2s
117 smlal2 v23.2d, v3.4s, v25.4s
118 shrn v0.2s, v16.2d, 18
119 shrn2 v0.4s, v17.2d, 18
120 shrn v1.2s, v18.2d, 18
121 shrn2 v1.4s, v19.2d, 18
122 shrn v2.2s, v20.2d, 18
123 shrn2 v2.4s, v21.2d, 18
124 shrn v3.2s, v22.2d, 18
125 shrn2 v3.4s, v23.2d, 18
128 smin v0.4s, v0.4s, v24.4s
129 smin v1.4s, v1.4s, v24.4s
130 smin v2.4s, v2.4s, v24.4s
131 smin v3.4s, v3.4s, v24.4s
133 st1 {v0.4s, v1.4s}, [x0], #32
134 st1 {v2.4s, v3.4s}, [x1], #32
150 mla v16.4s, v20.4s, v25.4s
151 mla v17.4s, v21.4s, v25.4s
152 mla v18.4s, v22.4s, v25.4s
153 mla v19.4s, v23.4s, v25.4s
155 sqshrn v0.4h, v16.4s, 14
156 sqshrn v1.4h, v17.4s, 14
157 sqshrn2 v0.8h, v18.4s, 14
158 sqshrn2 v1.8h, v19.4s, 14
160 shrn v0.4h, v16.4s, 14
161 shrn v1.4h, v17.4s, 14
162 shrn2 v0.8h, v18.4s, 14
163 shrn2 v1.8h, v19.4s, 14
166 st1 {v0.8h}, [x0], #16
167 st1 {v1.8h}, [x1], #16
174 lumConvertRange To, 8
175 lumConvertRange To, 16
176 chrConvertRange To, 8
177 chrConvertRange To, 16
178 lumConvertRange From, 8
179 lumConvertRange From, 16
180 chrConvertRange From, 8
181 chrConvertRange From, 16