libswscale/lut3d.c

   1 /*
   2  * Copyright (C) 2024 Niklas Haas
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include <assert.h>
  22 #include <string.h>
  23
  24 #include "libavutil/attributes.h"
  25 #include "libavutil/avassert.h"
  26 #include "libavutil/mem.h"
  27
  28 #include "cms.h"
  29 #include "csputils.h"
  30 #include "lut3d.h"
  31
  32 SwsLut3D *ff_sws_lut3d_alloc(void)
  33 {
  34     SwsLut3D *lut3d = av_malloc(sizeof(*lut3d));
  35     if (!lut3d)
  36         return NULL;
  37
  38     lut3d->dynamic = false;
  39     return lut3d;
  40 }
  41
  42 void ff_sws_lut3d_free(SwsLut3D **plut3d)
  43 {
  44     av_freep(plut3d);
  45 }
  46
  47 bool ff_sws_lut3d_test_fmt(enum AVPixelFormat fmt, int output)
  48 {
  49     return fmt == AV_PIX_FMT_RGBA64;
  50 }
  51
  52 enum AVPixelFormat ff_sws_lut3d_pick_pixfmt(SwsFormat fmt, int output)
  53 {
  54     return AV_PIX_FMT_RGBA64;
  55 }
  56
  57 /**
  58  * v0 and v1 are 'black' and 'white'
  59  * v2 and v3 are closest RGB/CMY vertices
  60  * x >= y >= z are relative weights
  61  */
  62 static av_always_inline
  63 v3u16_t barycentric(int shift, int x, int y, int z,
  64                     v3u16_t v0, v3u16_t v1, v3u16_t v2, v3u16_t v3)
  65 {
  66     const int a = (1 << shift) - x;
  67     const int b = x - y;
  68     const int c = y - z;
  69     const int d = z;
  70     av_assert2(x >= y);
  71     av_assert2(y >= z);
  72
  73     return (v3u16_t) {
  74         (a * v0.x + b * v1.x + c * v2.x + d * v3.x) >> shift,
  75         (a * v0.y + b * v1.y + c * v2.y + d * v3.y) >> shift,
  76         (a * v0.z + b * v1.z + c * v2.z + d * v3.z) >> shift,
  77     };
  78 }
  79
  80 static av_always_inline
  81 v3u16_t tetrahedral(const SwsLut3D *lut3d, int Rx, int Gx, int Bx,
  82                     int Rf, int Gf, int Bf)
  83 {
  84     const int shift = 16 - INPUT_LUT_BITS;
  85     const int Rn = FFMIN(Rx + 1, INPUT_LUT_SIZE - 1);
  86     const int Gn = FFMIN(Gx + 1, INPUT_LUT_SIZE - 1);
  87     const int Bn = FFMIN(Bx + 1, INPUT_LUT_SIZE - 1);
  88
  89     const v3u16_t c000 = lut3d->input[Bx][Gx][Rx];
  90     const v3u16_t c111 = lut3d->input[Bn][Gn][Rn];
  91     if (Rf > Gf) {
  92         if (Gf > Bf) {
  93             const v3u16_t c100 = lut3d->input[Bx][Gx][Rn];
  94             const v3u16_t c110 = lut3d->input[Bx][Gn][Rn];
  95             return barycentric(shift, Rf, Gf, Bf, c000, c100, c110, c111);
  96         } else if (Rf > Bf) {
  97             const v3u16_t c100 = lut3d->input[Bx][Gx][Rn];
  98             const v3u16_t c101 = lut3d->input[Bn][Gx][Rn];
  99             return barycentric(shift, Rf, Bf, Gf, c000, c100, c101, c111);
 100         } else {
 101             const v3u16_t c001 = lut3d->input[Bn][Gx][Rx];
 102             const v3u16_t c101 = lut3d->input[Bn][Gx][Rn];
 103             return barycentric(shift, Bf, Rf, Gf, c000, c001, c101, c111);
 104         }
 105     } else {
 106         if (Bf > Gf) {
 107             const v3u16_t c001 = lut3d->input[Bn][Gx][Rx];
 108             const v3u16_t c011 = lut3d->input[Bn][Gn][Rx];
 109             return barycentric(shift, Bf, Gf, Rf, c000, c001, c011, c111);
 110         } else if (Bf > Rf) {
 111             const v3u16_t c010 = lut3d->input[Bx][Gn][Rx];
 112             const v3u16_t c011 = lut3d->input[Bn][Gn][Rx];
 113             return barycentric(shift, Gf, Bf, Rf, c000, c010, c011, c111);
 114         } else {
 115             const v3u16_t c010 = lut3d->input[Bx][Gn][Rx];
 116             const v3u16_t c110 = lut3d->input[Bx][Gn][Rn];
 117             return barycentric(shift, Gf, Rf, Bf, c000, c010, c110, c111);
 118         }
 119     }
 120 }
 121
 122 static av_always_inline v3u16_t lookup_input16(const SwsLut3D *lut3d, v3u16_t rgb)
 123 {
 124     const int shift = 16 - INPUT_LUT_BITS;
 125     const int Rx = rgb.x >> shift;
 126     const int Gx = rgb.y >> shift;
 127     const int Bx = rgb.z >> shift;
 128     const int Rf = rgb.x & ((1 << shift) - 1);
 129     const int Gf = rgb.y & ((1 << shift) - 1);
 130     const int Bf = rgb.z & ((1 << shift) - 1);
 131     return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf);
 132 }
 133
 134 static av_always_inline v3u16_t lookup_input8(const SwsLut3D *lut3d, v3u8_t rgb)
 135 {
 136     static_assert(INPUT_LUT_BITS <= 8, "INPUT_LUT_BITS must be <= 8");
 137     const int shift = 8 - INPUT_LUT_BITS;
 138     const int Rx = rgb.x >> shift;
 139     const int Gx = rgb.y >> shift;
 140     const int Bx = rgb.z >> shift;
 141     const int Rf = rgb.x & ((1 << shift) - 1);
 142     const int Gf = rgb.y & ((1 << shift) - 1);
 143     const int Bf = rgb.z & ((1 << shift) - 1);
 144     return tetrahedral(lut3d, Rx, Gx, Bx, Rf, Gf, Bf);
 145 }
 146
 147 /**
 148  * Note: These functions are scaled such that x == (1 << shift) corresponds to
 149  * a value of 1.0. This makes them suitable for use when interpolation LUT
 150  * entries with a fractional part that is just masked away from the index,
 151  * since a fractional coordinate of e.g. 0xFFFF corresponds to a mix weight of
 152  * just slightly *less* than 1.0.
 153  */
 154 static av_always_inline v2u16_t lerp2u16(v2u16_t a, v2u16_t b, int x, int shift)
 155 {
 156     const int xi = (1 << shift) - x;
 157     return (v2u16_t) {
 158         (a.x * xi + b.x * x) >> shift,
 159         (a.y * xi + b.y * x) >> shift,
 160     };
 161 }
 162
 163 static av_always_inline v3u16_t lerp3u16(v3u16_t a, v3u16_t b, int x, int shift)
 164 {
 165     const int xi = (1 << shift) - x;
 166     return (v3u16_t) {
 167         (a.x * xi + b.x * x) >> shift,
 168         (a.y * xi + b.y * x) >> shift,
 169         (a.z * xi + b.z * x) >> shift,
 170     };
 171 }
 172
 173 static av_always_inline v3u16_t lookup_output(const SwsLut3D *lut3d, v3u16_t ipt)
 174 {
 175     const int Ishift = 16 - OUTPUT_LUT_BITS_I;
 176     const int Cshift = 16 - OUTPUT_LUT_BITS_PT;
 177     const int Ix = ipt.x >> Ishift;
 178     const int Px = ipt.y >> Cshift;
 179     const int Tx = ipt.z >> Cshift;
 180     const int If = ipt.x & ((1 << Ishift) - 1);
 181     const int Pf = ipt.y & ((1 << Cshift) - 1);
 182     const int Tf = ipt.z & ((1 << Cshift) - 1);
 183     const int In = FFMIN(Ix + 1, OUTPUT_LUT_SIZE_I  - 1);
 184     const int Pn = FFMIN(Px + 1, OUTPUT_LUT_SIZE_PT - 1);
 185     const int Tn = FFMIN(Tx + 1, OUTPUT_LUT_SIZE_PT - 1);
 186
 187     /* Trilinear interpolation */
 188     const v3u16_t c000 = lut3d->output[Tx][Px][Ix];
 189     const v3u16_t c001 = lut3d->output[Tx][Px][In];
 190     const v3u16_t c010 = lut3d->output[Tx][Pn][Ix];
 191     const v3u16_t c011 = lut3d->output[Tx][Pn][In];
 192     const v3u16_t c100 = lut3d->output[Tn][Px][Ix];
 193     const v3u16_t c101 = lut3d->output[Tn][Px][In];
 194     const v3u16_t c110 = lut3d->output[Tn][Pn][Ix];
 195     const v3u16_t c111 = lut3d->output[Tn][Pn][In];
 196     const v3u16_t c00  = lerp3u16(c000, c100, Tf, Cshift);
 197     const v3u16_t c10  = lerp3u16(c010, c110, Tf, Cshift);
 198     const v3u16_t c01  = lerp3u16(c001, c101, Tf, Cshift);
 199     const v3u16_t c11  = lerp3u16(c011, c111, Tf, Cshift);
 200     const v3u16_t c0   = lerp3u16(c00,  c10,  Pf, Cshift);
 201     const v3u16_t c1   = lerp3u16(c01,  c11,  Pf, Cshift);
 202     const v3u16_t c    = lerp3u16(c0,   c1,   If, Ishift);
 203     return c;
 204 }
 205
 206 static av_always_inline v3u16_t apply_tone_map(const SwsLut3D *lut3d, v3u16_t ipt)
 207 {
 208     const int shift = 16 - TONE_LUT_BITS;
 209     const int Ix = ipt.x >> shift;
 210     const int If = ipt.x & ((1 << shift) - 1);
 211     const int In = FFMIN(Ix + 1, TONE_LUT_SIZE - 1);
 212
 213     const v2u16_t w0 = lut3d->tone_map[Ix];
 214     const v2u16_t w1 = lut3d->tone_map[In];
 215     const v2u16_t w  = lerp2u16(w0, w1, If, shift);
 216     const int base   = (1 << 15) - w.y;
 217
 218     ipt.x = w.x;
 219     ipt.y = base + (ipt.y * w.y >> 15);
 220     ipt.z = base + (ipt.z * w.y >> 15);
 221     return ipt;
 222 }
 223
 224 int ff_sws_lut3d_generate(SwsLut3D *lut3d, enum AVPixelFormat fmt_in,
 225                           enum AVPixelFormat fmt_out, const SwsColorMap *map)
 226 {
 227     int ret;
 228
 229     if (!ff_sws_lut3d_test_fmt(fmt_in, 0) || !ff_sws_lut3d_test_fmt(fmt_out, 1))
 230         return AVERROR(EINVAL);
 231
 232     lut3d->dynamic = map->src.frame_peak.num > 0;
 233     lut3d->map = *map;
 234
 235     if (lut3d->dynamic) {
 236         ret = ff_sws_color_map_generate_dynamic(&lut3d->input[0][0][0],
 237                                              &lut3d->output[0][0][0],
 238                                              INPUT_LUT_SIZE, OUTPUT_LUT_SIZE_I,
 239                                              OUTPUT_LUT_SIZE_PT, map);
 240         if (ret < 0)
 241             return ret;
 242
 243         /* Make sure initial state is valid */
 244         ff_sws_lut3d_update(lut3d, &map->src);
 245         return 0;
 246     } else {
 247         return ff_sws_color_map_generate_static(&lut3d->input[0][0][0],
 248                                              INPUT_LUT_SIZE, map);
 249     }
 250 }
 251
 252 void ff_sws_lut3d_update(SwsLut3D *lut3d, const SwsColor *new_src)
 253 {
 254     if (!new_src || !lut3d->dynamic)
 255         return;
 256
 257     lut3d->map.src.frame_peak = new_src->frame_peak;
 258     lut3d->map.src.frame_avg  = new_src->frame_avg;
 259
 260     ff_sws_tone_map_generate(lut3d->tone_map, TONE_LUT_SIZE, &lut3d->map);
 261 }
 262
 263 void ff_sws_lut3d_apply(const SwsLut3D *lut3d, const uint8_t *in, int in_stride,
 264                         uint8_t *out, int out_stride, int w, int h)
 265 {
 266     while (h--) {
 267         const uint16_t *in16 = (const uint16_t *) in;
 268         uint16_t *out16 = (uint16_t *) out;
 269
 270         for (int x = 0; x < w; x++) {
 271             v3u16_t c = { in16[0], in16[1], in16[2] };
 272             c = lookup_input16(lut3d, c);
 273
 274             if (lut3d->dynamic) {
 275                 c = apply_tone_map(lut3d, c);
 276                 c = lookup_output(lut3d, c);
 277             }
 278
 279             out16[0] = c.x;
 280             out16[1] = c.y;
 281             out16[2] = c.z;
 282             out16[3] = in16[3];
 283             in16  += 4;
 284             out16 += 4;
 285         }
 286
 287         in  += in_stride;
 288         out += out_stride;
 289     }
 290 }