2 * Copyright (C) 2024 Niklas Haas
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/attributes.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/mem.h"
32 SwsLut3D
*ff_sws_lut3d_alloc(void)
34 SwsLut3D
*lut3d
= av_malloc(sizeof(*lut3d
));
38 lut3d
->dynamic
= false;
42 void ff_sws_lut3d_free(SwsLut3D
**plut3d
)
47 bool ff_sws_lut3d_test_fmt(enum AVPixelFormat fmt
, int output
)
49 return fmt
== AV_PIX_FMT_RGBA64
;
52 enum AVPixelFormat
ff_sws_lut3d_pick_pixfmt(SwsFormat fmt
, int output
)
54 return AV_PIX_FMT_RGBA64
;
58 * v0 and v1 are 'black' and 'white'
59 * v2 and v3 are closest RGB/CMY vertices
60 * x >= y >= z are relative weights
62 static av_always_inline
63 v3u16_t
barycentric(int shift
, int x
, int y
, int z
,
64 v3u16_t v0
, v3u16_t v1
, v3u16_t v2
, v3u16_t v3
)
66 const int a
= (1 << shift
) - x
;
74 (a
* v0
.x
+ b
* v1
.x
+ c
* v2
.x
+ d
* v3
.x
) >> shift
,
75 (a
* v0
.y
+ b
* v1
.y
+ c
* v2
.y
+ d
* v3
.y
) >> shift
,
76 (a
* v0
.z
+ b
* v1
.z
+ c
* v2
.z
+ d
* v3
.z
) >> shift
,
80 static av_always_inline
81 v3u16_t
tetrahedral(const SwsLut3D
*lut3d
, int Rx
, int Gx
, int Bx
,
82 int Rf
, int Gf
, int Bf
)
84 const int shift
= 16 - INPUT_LUT_BITS
;
85 const int Rn
= FFMIN(Rx
+ 1, INPUT_LUT_SIZE
- 1);
86 const int Gn
= FFMIN(Gx
+ 1, INPUT_LUT_SIZE
- 1);
87 const int Bn
= FFMIN(Bx
+ 1, INPUT_LUT_SIZE
- 1);
89 const v3u16_t c000
= lut3d
->input
[Bx
][Gx
][Rx
];
90 const v3u16_t c111
= lut3d
->input
[Bn
][Gn
][Rn
];
93 const v3u16_t c100
= lut3d
->input
[Bx
][Gx
][Rn
];
94 const v3u16_t c110
= lut3d
->input
[Bx
][Gn
][Rn
];
95 return barycentric(shift
, Rf
, Gf
, Bf
, c000
, c100
, c110
, c111
);
97 const v3u16_t c100
= lut3d
->input
[Bx
][Gx
][Rn
];
98 const v3u16_t c101
= lut3d
->input
[Bn
][Gx
][Rn
];
99 return barycentric(shift
, Rf
, Bf
, Gf
, c000
, c100
, c101
, c111
);
101 const v3u16_t c001
= lut3d
->input
[Bn
][Gx
][Rx
];
102 const v3u16_t c101
= lut3d
->input
[Bn
][Gx
][Rn
];
103 return barycentric(shift
, Bf
, Rf
, Gf
, c000
, c001
, c101
, c111
);
107 const v3u16_t c001
= lut3d
->input
[Bn
][Gx
][Rx
];
108 const v3u16_t c011
= lut3d
->input
[Bn
][Gn
][Rx
];
109 return barycentric(shift
, Bf
, Gf
, Rf
, c000
, c001
, c011
, c111
);
110 } else if (Bf
> Rf
) {
111 const v3u16_t c010
= lut3d
->input
[Bx
][Gn
][Rx
];
112 const v3u16_t c011
= lut3d
->input
[Bn
][Gn
][Rx
];
113 return barycentric(shift
, Gf
, Bf
, Rf
, c000
, c010
, c011
, c111
);
115 const v3u16_t c010
= lut3d
->input
[Bx
][Gn
][Rx
];
116 const v3u16_t c110
= lut3d
->input
[Bx
][Gn
][Rn
];
117 return barycentric(shift
, Gf
, Rf
, Bf
, c000
, c010
, c110
, c111
);
122 static av_always_inline v3u16_t
lookup_input16(const SwsLut3D
*lut3d
, v3u16_t rgb
)
124 const int shift
= 16 - INPUT_LUT_BITS
;
125 const int Rx
= rgb
.x
>> shift
;
126 const int Gx
= rgb
.y
>> shift
;
127 const int Bx
= rgb
.z
>> shift
;
128 const int Rf
= rgb
.x
& ((1 << shift
) - 1);
129 const int Gf
= rgb
.y
& ((1 << shift
) - 1);
130 const int Bf
= rgb
.z
& ((1 << shift
) - 1);
131 return tetrahedral(lut3d
, Rx
, Gx
, Bx
, Rf
, Gf
, Bf
);
134 static av_always_inline v3u16_t
lookup_input8(const SwsLut3D
*lut3d
, v3u8_t rgb
)
136 static_assert(INPUT_LUT_BITS
<= 8, "INPUT_LUT_BITS must be <= 8");
137 const int shift
= 8 - INPUT_LUT_BITS
;
138 const int Rx
= rgb
.x
>> shift
;
139 const int Gx
= rgb
.y
>> shift
;
140 const int Bx
= rgb
.z
>> shift
;
141 const int Rf
= rgb
.x
& ((1 << shift
) - 1);
142 const int Gf
= rgb
.y
& ((1 << shift
) - 1);
143 const int Bf
= rgb
.z
& ((1 << shift
) - 1);
144 return tetrahedral(lut3d
, Rx
, Gx
, Bx
, Rf
, Gf
, Bf
);
148 * Note: These functions are scaled such that x == (1 << shift) corresponds to
149 * a value of 1.0. This makes them suitable for use when interpolation LUT
150 * entries with a fractional part that is just masked away from the index,
151 * since a fractional coordinate of e.g. 0xFFFF corresponds to a mix weight of
152 * just slightly *less* than 1.0.
154 static av_always_inline v2u16_t
lerp2u16(v2u16_t a
, v2u16_t b
, int x
, int shift
)
156 const int xi
= (1 << shift
) - x
;
158 (a
.x
* xi
+ b
.x
* x
) >> shift
,
159 (a
.y
* xi
+ b
.y
* x
) >> shift
,
163 static av_always_inline v3u16_t
lerp3u16(v3u16_t a
, v3u16_t b
, int x
, int shift
)
165 const int xi
= (1 << shift
) - x
;
167 (a
.x
* xi
+ b
.x
* x
) >> shift
,
168 (a
.y
* xi
+ b
.y
* x
) >> shift
,
169 (a
.z
* xi
+ b
.z
* x
) >> shift
,
173 static av_always_inline v3u16_t
lookup_output(const SwsLut3D
*lut3d
, v3u16_t ipt
)
175 const int Ishift
= 16 - OUTPUT_LUT_BITS_I
;
176 const int Cshift
= 16 - OUTPUT_LUT_BITS_PT
;
177 const int Ix
= ipt
.x
>> Ishift
;
178 const int Px
= ipt
.y
>> Cshift
;
179 const int Tx
= ipt
.z
>> Cshift
;
180 const int If
= ipt
.x
& ((1 << Ishift
) - 1);
181 const int Pf
= ipt
.y
& ((1 << Cshift
) - 1);
182 const int Tf
= ipt
.z
& ((1 << Cshift
) - 1);
183 const int In
= FFMIN(Ix
+ 1, OUTPUT_LUT_SIZE_I
- 1);
184 const int Pn
= FFMIN(Px
+ 1, OUTPUT_LUT_SIZE_PT
- 1);
185 const int Tn
= FFMIN(Tx
+ 1, OUTPUT_LUT_SIZE_PT
- 1);
187 /* Trilinear interpolation */
188 const v3u16_t c000
= lut3d
->output
[Tx
][Px
][Ix
];
189 const v3u16_t c001
= lut3d
->output
[Tx
][Px
][In
];
190 const v3u16_t c010
= lut3d
->output
[Tx
][Pn
][Ix
];
191 const v3u16_t c011
= lut3d
->output
[Tx
][Pn
][In
];
192 const v3u16_t c100
= lut3d
->output
[Tn
][Px
][Ix
];
193 const v3u16_t c101
= lut3d
->output
[Tn
][Px
][In
];
194 const v3u16_t c110
= lut3d
->output
[Tn
][Pn
][Ix
];
195 const v3u16_t c111
= lut3d
->output
[Tn
][Pn
][In
];
196 const v3u16_t c00
= lerp3u16(c000
, c100
, Tf
, Cshift
);
197 const v3u16_t c10
= lerp3u16(c010
, c110
, Tf
, Cshift
);
198 const v3u16_t c01
= lerp3u16(c001
, c101
, Tf
, Cshift
);
199 const v3u16_t c11
= lerp3u16(c011
, c111
, Tf
, Cshift
);
200 const v3u16_t c0
= lerp3u16(c00
, c10
, Pf
, Cshift
);
201 const v3u16_t c1
= lerp3u16(c01
, c11
, Pf
, Cshift
);
202 const v3u16_t c
= lerp3u16(c0
, c1
, If
, Ishift
);
206 static av_always_inline v3u16_t
apply_tone_map(const SwsLut3D
*lut3d
, v3u16_t ipt
)
208 const int shift
= 16 - TONE_LUT_BITS
;
209 const int Ix
= ipt
.x
>> shift
;
210 const int If
= ipt
.x
& ((1 << shift
) - 1);
211 const int In
= FFMIN(Ix
+ 1, TONE_LUT_SIZE
- 1);
213 const v2u16_t w0
= lut3d
->tone_map
[Ix
];
214 const v2u16_t w1
= lut3d
->tone_map
[In
];
215 const v2u16_t w
= lerp2u16(w0
, w1
, If
, shift
);
216 const int base
= (1 << 15) - w
.y
;
219 ipt
.y
= base
+ (ipt
.y
* w
.y
>> 15);
220 ipt
.z
= base
+ (ipt
.z
* w
.y
>> 15);
224 int ff_sws_lut3d_generate(SwsLut3D
*lut3d
, enum AVPixelFormat fmt_in
,
225 enum AVPixelFormat fmt_out
, const SwsColorMap
*map
)
229 if (!ff_sws_lut3d_test_fmt(fmt_in
, 0) || !ff_sws_lut3d_test_fmt(fmt_out
, 1))
230 return AVERROR(EINVAL
);
232 lut3d
->dynamic
= map
->src
.frame_peak
.num
> 0;
235 if (lut3d
->dynamic
) {
236 ret
= ff_sws_color_map_generate_dynamic(&lut3d
->input
[0][0][0],
237 &lut3d
->output
[0][0][0],
238 INPUT_LUT_SIZE
, OUTPUT_LUT_SIZE_I
,
239 OUTPUT_LUT_SIZE_PT
, map
);
243 /* Make sure initial state is valid */
244 ff_sws_lut3d_update(lut3d
, &map
->src
);
247 return ff_sws_color_map_generate_static(&lut3d
->input
[0][0][0],
248 INPUT_LUT_SIZE
, map
);
252 void ff_sws_lut3d_update(SwsLut3D
*lut3d
, const SwsColor
*new_src
)
254 if (!new_src
|| !lut3d
->dynamic
)
257 lut3d
->map
.src
.frame_peak
= new_src
->frame_peak
;
258 lut3d
->map
.src
.frame_avg
= new_src
->frame_avg
;
260 ff_sws_tone_map_generate(lut3d
->tone_map
, TONE_LUT_SIZE
, &lut3d
->map
);
263 void ff_sws_lut3d_apply(const SwsLut3D
*lut3d
, const uint8_t *in
, int in_stride
,
264 uint8_t *out
, int out_stride
, int w
, int h
)
267 const uint16_t *in16
= (const uint16_t *) in
;
268 uint16_t *out16
= (uint16_t *) out
;
270 for (int x
= 0; x
< w
; x
++) {
271 v3u16_t c
= { in16
[0], in16
[1], in16
[2] };
272 c
= lookup_input16(lut3d
, c
);
274 if (lut3d
->dynamic
) {
275 c
= apply_tone_map(lut3d
, c
);
276 c
= lookup_output(lut3d
, c
);