libswscale/swscale_internal.h

   1 /*
   2  * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #ifndef SWSCALE_SWSCALE_INTERNAL_H
  22 #define SWSCALE_SWSCALE_INTERNAL_H
  23
  24 #include <stdatomic.h>
  25 #include <assert.h>
  26
  27 #include "config.h"
  28 #include "swscale.h"
  29 #include "graph.h"
  30
  31 #include "libavutil/avassert.h"
  32 #include "libavutil/common.h"
  33 #include "libavutil/frame.h"
  34 #include "libavutil/intreadwrite.h"
  35 #include "libavutil/log.h"
  36 #include "libavutil/mem_internal.h"
  37 #include "libavutil/pixfmt.h"
  38 #include "libavutil/pixdesc.h"
  39 #include "libavutil/slicethread.h"
  40 #if HAVE_ALTIVEC
  41 #include "libavutil/ppc/util_altivec.h"
  42 #endif
  43 #include "libavutil/half2float.h"
  44
  45 #define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
  46
  47 #define YUVRGB_TABLE_HEADROOM 512
  48 #define YUVRGB_TABLE_LUMA_HEADROOM 512
  49
  50 #define MAX_FILTER_SIZE SWS_MAX_FILTER_SIZE
  51
  52 #define SWS_MAX_THREADS 8192 /* sanity clamp */
  53
  54 #if HAVE_BIGENDIAN
  55 #define ALT32_CORR (-1)
  56 #else
  57 #define ALT32_CORR   1
  58 #endif
  59
  60 #if ARCH_X86_64
  61 #   define APCK_PTR2  8
  62 #   define APCK_COEF 16
  63 #   define APCK_SIZE 24
  64 #else
  65 #   define APCK_PTR2  4
  66 #   define APCK_COEF  8
  67 #   define APCK_SIZE 16
  68 #endif
  69
  70 #define RETCODE_USE_CASCADE -12345
  71
  72 typedef struct SwsInternal SwsInternal;
  73
  74 static inline SwsInternal *sws_internal(const SwsContext *sws)
  75 {
  76     return (SwsInternal *) sws;
  77 }
  78
  79 typedef struct Range {
  80     unsigned int start;
  81     unsigned int len;
  82 } Range;
  83
  84 typedef struct RangeList {
  85     Range          *ranges;
  86     unsigned int nb_ranges;
  87     int             ranges_allocated;
  88 } RangeList;
  89
  90 int ff_range_add(RangeList *r, unsigned int start, unsigned int len);
  91
  92 typedef int (*SwsFunc)(SwsInternal *c, const uint8_t *const src[],
  93                        const int srcStride[], int srcSliceY, int srcSliceH,
  94                        uint8_t *const dst[], const int dstStride[]);
  95
  96 /**
  97  * Write one line of horizontally scaled data to planar output
  98  * without any additional vertical scaling (or point-scaling).
  99  *
 100  * @param src     scaled source data, 15 bits for 8-10-bit output,
 101  *                19 bits for 16-bit output (in int32_t)
 102  * @param dest    pointer to the output plane. For >8-bit
 103  *                output, this is in uint16_t
 104  * @param dstW    width of destination in pixels
 105  * @param dither  ordered dither array of type int16_t and size 8
 106  * @param offset  Dither offset
 107  */
 108 typedef void (*yuv2planar1_fn)(const int16_t *src, uint8_t *dest, int dstW,
 109                                const uint8_t *dither, int offset);
 110
 111 /**
 112  * Write one line of horizontally scaled data to planar output
 113  * with multi-point vertical scaling between input pixels.
 114  *
 115  * @param filter        vertical luma/alpha scaling coefficients, 12 bits [0,4096]
 116  * @param src           scaled luma (Y) or alpha (A) source data, 15 bits for
 117  *                      8-10-bit output, 19 bits for 16-bit output (in int32_t)
 118  * @param filterSize    number of vertical input lines to scale
 119  * @param dest          pointer to output plane. For >8-bit
 120  *                      output, this is in uint16_t
 121  * @param dstW          width of destination pixels
 122  * @param offset        Dither offset
 123  */
 124 typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
 125                                const int16_t **src, uint8_t *dest, int dstW,
 126                                const uint8_t *dither, int offset);
 127
 128 /**
 129  * Write one line of horizontally scaled chroma to interleaved output
 130  * with multi-point vertical scaling between input pixels.
 131  *
 132  * @param dstFormat     destination pixel format
 133  * @param chrDither     ordered dither array of type uint8_t and size 8
 134  * @param chrFilter     vertical chroma scaling coefficients, 12 bits [0,4096]
 135  * @param chrUSrc       scaled chroma (U) source data, 15 bits for 8-10-bit
 136  *                      output, 19 bits for 16-bit output (in int32_t)
 137  * @param chrVSrc       scaled chroma (V) source data, 15 bits for 8-10-bit
 138  *                      output, 19 bits for 16-bit output (in int32_t)
 139  * @param chrFilterSize number of vertical chroma input lines to scale
 140  * @param dest          pointer to the output plane. For >8-bit
 141  *                      output, this is in uint16_t
 142  * @param dstW          width of chroma planes
 143  */
 144 typedef void (*yuv2interleavedX_fn)(enum AVPixelFormat dstFormat,
 145                                     const uint8_t *chrDither,
 146                                     const int16_t *chrFilter,
 147                                     int chrFilterSize,
 148                                     const int16_t **chrUSrc,
 149                                     const int16_t **chrVSrc,
 150                                     uint8_t *dest, int dstW);
 151
 152 /**
 153  * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
 154  * output without any additional vertical scaling (or point-scaling). Note
 155  * that this function may do chroma scaling, see the "uvalpha" argument.
 156  *
 157  * @param c       SWS scaling context
 158  * @param lumSrc  scaled luma (Y) source data, 15 bits for 8-10-bit output,
 159  *                19 bits for 16-bit output (in int32_t)
 160  * @param chrUSrc scaled chroma (U) source data, 15 bits for 8-10-bit output,
 161  *                19 bits for 16-bit output (in int32_t)
 162  * @param chrVSrc scaled chroma (V) source data, 15 bits for 8-10-bit output,
 163  *                19 bits for 16-bit output (in int32_t)
 164  * @param alpSrc  scaled alpha (A) source data, 15 bits for 8-10-bit output,
 165  *                19 bits for 16-bit output (in int32_t)
 166  * @param dest    pointer to the output plane. For 16-bit output, this is
 167  *                uint16_t
 168  * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
 169  *                to write into dest[]
 170  * @param uvalpha chroma scaling coefficient for the second line of chroma
 171  *                pixels, either 2048 or 0. If 0, one chroma input is used
 172  *                for 2 output pixels (or if the SWS_FLAG_FULL_CHR_INT flag
 173  *                is set, it generates 1 output pixel). If 2048, two chroma
 174  *                input pixels should be averaged for 2 output pixels (this
 175  *                only happens if SWS_FLAG_FULL_CHR_INT is not set)
 176  * @param y       vertical line number for this output. This does not need
 177  *                to be used to calculate the offset in the destination,
 178  *                but can be used to generate comfort noise using dithering
 179  *                for some output formats.
 180  */
 181 typedef void (*yuv2packed1_fn)(SwsInternal *c, const int16_t *lumSrc,
 182                                const int16_t *chrUSrc[2],
 183                                const int16_t *chrVSrc[2],
 184                                const int16_t *alpSrc, uint8_t *dest,
 185                                int dstW, int uvalpha, int y);
 186 /**
 187  * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
 188  * output by doing bilinear scaling between two input lines.
 189  *
 190  * @param c       SWS scaling context
 191  * @param lumSrc  scaled luma (Y) source data, 15 bits for 8-10-bit output,
 192  *                19 bits for 16-bit output (in int32_t)
 193  * @param chrUSrc scaled chroma (U) source data, 15 bits for 8-10-bit output,
 194  *                19 bits for 16-bit output (in int32_t)
 195  * @param chrVSrc scaled chroma (V) source data, 15 bits for 8-10-bit output,
 196  *                19 bits for 16-bit output (in int32_t)
 197  * @param alpSrc  scaled alpha (A) source data, 15 bits for 8-10-bit output,
 198  *                19 bits for 16-bit output (in int32_t)
 199  * @param dest    pointer to the output plane. For 16-bit output, this is
 200  *                uint16_t
 201  * @param dstW    width of lumSrc and alpSrc in pixels, number of pixels
 202  *                to write into dest[]
 203  * @param yalpha  luma/alpha scaling coefficients for the second input line.
 204  *                The first line's coefficients can be calculated by using
 205  *                4096 - yalpha
 206  * @param uvalpha chroma scaling coefficient for the second input line. The
 207  *                first line's coefficients can be calculated by using
 208  *                4096 - uvalpha
 209  * @param y       vertical line number for this output. This does not need
 210  *                to be used to calculate the offset in the destination,
 211  *                but can be used to generate comfort noise using dithering
 212  *                for some output formats.
 213  */
 214 typedef void (*yuv2packed2_fn)(SwsInternal *c, const int16_t *lumSrc[2],
 215                                const int16_t *chrUSrc[2],
 216                                const int16_t *chrVSrc[2],
 217                                const int16_t *alpSrc[2],
 218                                uint8_t *dest,
 219                                int dstW, int yalpha, int uvalpha, int y);
 220 /**
 221  * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
 222  * output by doing multi-point vertical scaling between input pixels.
 223  *
 224  * @param c             SWS scaling context
 225  * @param lumFilter     vertical luma/alpha scaling coefficients, 12 bits [0,4096]
 226  * @param lumSrc        scaled luma (Y) source data, 15 bits for 8-10-bit output,
 227  *                      19 bits for 16-bit output (in int32_t)
 228  * @param lumFilterSize number of vertical luma/alpha input lines to scale
 229  * @param chrFilter     vertical chroma scaling coefficients, 12 bits [0,4096]
 230  * @param chrUSrc       scaled chroma (U) source data, 15 bits for 8-10-bit output,
 231  *                      19 bits for 16-bit output (in int32_t)
 232  * @param chrVSrc       scaled chroma (V) source data, 15 bits for 8-10-bit output,
 233  *                      19 bits for 16-bit output (in int32_t)
 234  * @param chrFilterSize number of vertical chroma input lines to scale
 235  * @param alpSrc        scaled alpha (A) source data, 15 bits for 8-10-bit output,
 236  *                      19 bits for 16-bit output (in int32_t)
 237  * @param dest          pointer to the output plane. For 16-bit output, this is
 238  *                      uint16_t
 239  * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
 240  *                      to write into dest[]
 241  * @param y             vertical line number for this output. This does not need
 242  *                      to be used to calculate the offset in the destination,
 243  *                      but can be used to generate comfort noise using dithering
 244  *                      or some output formats.
 245  */
 246 typedef void (*yuv2packedX_fn)(SwsInternal *c, const int16_t *lumFilter,
 247                                const int16_t **lumSrc, int lumFilterSize,
 248                                const int16_t *chrFilter,
 249                                const int16_t **chrUSrc,
 250                                const int16_t **chrVSrc, int chrFilterSize,
 251                                const int16_t **alpSrc, uint8_t *dest,
 252                                int dstW, int y);
 253
 254 /**
 255  * Write one line of horizontally scaled Y/U/V/A to YUV/RGB
 256  * output by doing multi-point vertical scaling between input pixels.
 257  *
 258  * @param c             SWS scaling context
 259  * @param lumFilter     vertical luma/alpha scaling coefficients, 12 bits [0,4096]
 260  * @param lumSrc        scaled luma (Y) source data, 15 bits for 8-10-bit output,
 261  *                      19 bits for 16-bit output (in int32_t)
 262  * @param lumFilterSize number of vertical luma/alpha input lines to scale
 263  * @param chrFilter     vertical chroma scaling coefficients, 12 bits [0,4096]
 264  * @param chrUSrc       scaled chroma (U) source data, 15 bits for 8-10-bit output,
 265  *                      19 bits for 16-bit output (in int32_t)
 266  * @param chrVSrc       scaled chroma (V) source data, 15 bits for 8-10-bit output,
 267  *                      19 bits for 16-bit output (in int32_t)
 268  * @param chrFilterSize number of vertical chroma input lines to scale
 269  * @param alpSrc        scaled alpha (A) source data, 15 bits for 8-10-bit output,
 270  *                      19 bits for 16-bit output (in int32_t)
 271  * @param dest          pointer to the output planes. For 16-bit output, this is
 272  *                      uint16_t
 273  * @param dstW          width of lumSrc and alpSrc in pixels, number of pixels
 274  *                      to write into dest[]
 275  * @param y             vertical line number for this output. This does not need
 276  *                      to be used to calculate the offset in the destination,
 277  *                      but can be used to generate comfort noise using dithering
 278  *                      or some output formats.
 279  */
 280 typedef void (*yuv2anyX_fn)(SwsInternal *c, const int16_t *lumFilter,
 281                             const int16_t **lumSrc, int lumFilterSize,
 282                             const int16_t *chrFilter,
 283                             const int16_t **chrUSrc,
 284                             const int16_t **chrVSrc, int chrFilterSize,
 285                             const int16_t **alpSrc, uint8_t **dest,
 286                             int dstW, int y);
 287
 288 /**
 289  * Unscaled conversion of luma/alpha plane to YV12 for horizontal scaler.
 290  */
 291 typedef void (*planar1_YV12_fn)(uint8_t *dst, const uint8_t *src, const uint8_t *src2,
 292                                 const uint8_t *src3, int width, uint32_t *pal,
 293                                 void *opaque);
 294
 295 /**
 296  * Unscaled conversion of chroma plane to YV12 for horizontal scaler.
 297  */
 298 typedef void (*planar2_YV12_fn)(uint8_t *dst, uint8_t *dst2, const uint8_t *src,
 299                                 const uint8_t *src2, const uint8_t *src3,
 300                                 int width, uint32_t *pal, void *opaque);
 301
 302 /**
 303  * Unscaled conversion of arbitrary planar data (e.g. RGBA) to YV12, through
 304  * conversion using the given color matrix.
 305  */
 306 typedef void (*planarX_YV12_fn)(uint8_t *dst, const uint8_t *src[4], int width,
 307                                 int32_t *rgb2yuv, void *opaque);
 308
 309 typedef void (*planarX2_YV12_fn)(uint8_t *dst, uint8_t *dst2,
 310                                  const uint8_t *src[4], int width,
 311                                  int32_t *rgb2yuv, void *opaque);
 312
 313 struct SwsSlice;
 314 struct SwsFilterDescriptor;
 315
 316 /* This struct should be aligned on at least a 32-byte boundary. */
 317 struct SwsInternal {
 318     /* Currently active user-facing options. Also contains AVClass */
 319     SwsContext opts;
 320
 321     /* Parent context (for slice contexts) */
 322     SwsContext *parent;
 323
 324     AVSliceThread      *slicethread;
 325     SwsContext        **slice_ctx;
 326     int                *slice_err;
 327     int              nb_slice_ctx;
 328
 329     /* Scaling graph, reinitialized dynamically as needed. */
 330     SwsGraph *graph[2]; /* top, bottom fields */
 331
 332     // values passed to current sws_receive_slice() call
 333     int dst_slice_start;
 334     int dst_slice_height;
 335
 336     /**
 337      * Note that src, dst, srcStride, dstStride will be copied in the
 338      * sws_scale() wrapper so they can be freely modified here.
 339      */
 340     SwsFunc convert_unscaled;
 341     int chrSrcW;                  ///< Width  of source      chroma     planes.
 342     int chrSrcH;                  ///< Height of source      chroma     planes.
 343     int chrDstW;                  ///< Width  of destination chroma     planes.
 344     int chrDstH;                  ///< Height of destination chroma     planes.
 345     int lumXInc, chrXInc;
 346     int lumYInc, chrYInc;
 347     int dstFormatBpp;             ///< Number of bits per pixel of the destination pixel format.
 348     int srcFormatBpp;             ///< Number of bits per pixel of the source      pixel format.
 349     int dstBpc, srcBpc;
 350     int chrSrcHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source      image.
 351     int chrSrcVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in source      image.
 352     int chrDstHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
 353     int chrDstVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in destination image.
 354     int vChrDrop;                 ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user.
 355     int sliceDir;                 ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top).
 356
 357     AVFrame *frame_src;
 358     AVFrame *frame_dst;
 359
 360     RangeList src_ranges;
 361
 362     /* The cascaded_* fields allow spliting a scaler task into multiple
 363      * sequential steps, this is for example used to limit the maximum
 364      * downscaling factor that needs to be supported in one scaler.
 365      */
 366     SwsContext *cascaded_context[3];
 367     int cascaded_tmpStride[2][4];
 368     uint8_t *cascaded_tmp[2][4];
 369     int cascaded_mainindex;
 370
 371     double gamma_value;
 372     int is_internal_gamma;
 373     uint16_t *gamma;
 374     uint16_t *inv_gamma;
 375
 376     int numDesc;
 377     int descIndex[2];
 378     int numSlice;
 379     struct SwsSlice *slice;
 380     struct SwsFilterDescriptor *desc;
 381
 382     uint32_t pal_yuv[256];
 383     uint32_t pal_rgb[256];
 384
 385     float uint2float_lut[256];
 386
 387     /**
 388      * @name Scaled horizontal lines ring buffer.
 389      * The horizontal scaler keeps just enough scaled lines in a ring buffer
 390      * so they may be passed to the vertical scaler. The pointers to the
 391      * allocated buffers for each line are duplicated in sequence in the ring
 392      * buffer to simplify indexing and avoid wrapping around between lines
 393      * inside the vertical scaler code. The wrapping is done before the
 394      * vertical scaler is called.
 395      */
 396     //@{
 397     int lastInLumBuf;             ///< Last scaled horizontal luma/alpha line from source in the ring buffer.
 398     int lastInChrBuf;             ///< Last scaled horizontal chroma     line from source in the ring buffer.
 399     //@}
 400
 401     uint8_t *formatConvBuffer;
 402     int needAlpha;
 403
 404     /**
 405      * @name Horizontal and vertical filters.
 406      * To better understand the following fields, here is a pseudo-code of
 407      * their usage in filtering a horizontal line:
 408      * @code
 409      * for (i = 0; i < width; i++) {
 410      *     dst[i] = 0;
 411      *     for (j = 0; j < filterSize; j++)
 412      *         dst[i] += src[ filterPos[i] + j ] * filter[ filterSize * i + j ];
 413      *     dst[i] >>= FRAC_BITS; // The actual implementation is fixed-point.
 414      * }
 415      * @endcode
 416      */
 417     //@{
 418     int16_t *hLumFilter;          ///< Array of horizontal filter coefficients for luma/alpha planes.
 419     int16_t *hChrFilter;          ///< Array of horizontal filter coefficients for chroma     planes.
 420     int16_t *vLumFilter;          ///< Array of vertical   filter coefficients for luma/alpha planes.
 421     int16_t *vChrFilter;          ///< Array of vertical   filter coefficients for chroma     planes.
 422     int32_t *hLumFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes.
 423     int32_t *hChrFilterPos;       ///< Array of horizontal filter starting positions for each dst[i] for chroma     planes.
 424     int32_t *vLumFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for luma/alpha planes.
 425     int32_t *vChrFilterPos;       ///< Array of vertical   filter starting positions for each dst[i] for chroma     planes.
 426     int hLumFilterSize;           ///< Horizontal filter size for luma/alpha pixels.
 427     int hChrFilterSize;           ///< Horizontal filter size for chroma     pixels.
 428     int vLumFilterSize;           ///< Vertical   filter size for luma/alpha pixels.
 429     int vChrFilterSize;           ///< Vertical   filter size for chroma     pixels.
 430     //@}
 431
 432     int lumMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes.
 433     int chrMmxextFilterCodeSize;  ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes.
 434     uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes.
 435     uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
 436
 437     int canMMXEXTBeUsed;
 438     int warned_unuseable_bilinear;
 439
 440     int dstY;                     ///< Last destination vertical line output from last slice.
 441     void *yuvTable;             // pointer to the yuv->rgb table start so it can be freed()
 442     // alignment ensures the offset can be added in a single
 443     // instruction on e.g. ARM
 444     DECLARE_ALIGNED(16, int, table_gV)[256 + 2*YUVRGB_TABLE_HEADROOM];
 445     uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
 446     uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
 447     uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
 448     DECLARE_ALIGNED(16, int32_t, input_rgb2yuv_table)[16+40*4]; // This table can contain both C and SIMD formatted values, the C vales are always at the XY_IDX points
 449 #define RY_IDX 0
 450 #define GY_IDX 1
 451 #define BY_IDX 2
 452 #define RU_IDX 3
 453 #define GU_IDX 4
 454 #define BU_IDX 5
 455 #define RV_IDX 6
 456 #define GV_IDX 7
 457 #define BV_IDX 8
 458 #define RGB2YUV_SHIFT 15
 459
 460     int *dither_error[4];
 461
 462     //Colorspace stuff
 463     int contrast, brightness, saturation;    // for sws_getColorspaceDetails
 464     int srcColorspaceTable[4];
 465     int dstColorspaceTable[4];
 466     int src0Alpha;
 467     int dst0Alpha;
 468     int srcXYZ;
 469     int dstXYZ;
 470     int yuv2rgb_y_offset;
 471     int yuv2rgb_y_coeff;
 472     int yuv2rgb_v2r_coeff;
 473     int yuv2rgb_v2g_coeff;
 474     int yuv2rgb_u2g_coeff;
 475     int yuv2rgb_u2b_coeff;
 476
 477 #define RED_DITHER            "0*8"
 478 #define GREEN_DITHER          "1*8"
 479 #define BLUE_DITHER           "2*8"
 480 #define Y_COEFF               "3*8"
 481 #define VR_COEFF              "4*8"
 482 #define UB_COEFF              "5*8"
 483 #define VG_COEFF              "6*8"
 484 #define UG_COEFF              "7*8"
 485 #define Y_OFFSET              "8*8"
 486 #define U_OFFSET              "9*8"
 487 #define V_OFFSET              "10*8"
 488 #define LUM_MMX_FILTER_OFFSET "11*8"
 489 #define CHR_MMX_FILTER_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)
 490 #define DSTW_OFFSET           "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2"
 491 #define ESP_OFFSET            "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+8"
 492 #define VROUNDER_OFFSET       "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+16"
 493 #define U_TEMP                "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+24"
 494 #define V_TEMP                "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+32"
 495 #define Y_TEMP                "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+40"
 496 #define ALP_MMX_FILTER_OFFSET "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*2+48"
 497 #define UV_OFF_PX             "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+48"
 498 #define UV_OFF_BYTE           "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+56"
 499 #define DITHER16              "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+64"
 500 #define DITHER32              "11*8+4*4*"AV_STRINGIFY(MAX_FILTER_SIZE)"*3+80"
 501 #define DITHER32_INT          (11*8+4*4*MAX_FILTER_SIZE*3+80) // value equal to above, used for checking that the struct hasn't been changed by mistake
 502
 503     DECLARE_ALIGNED(8, uint64_t, redDither);
 504     DECLARE_ALIGNED(8, uint64_t, greenDither);
 505     DECLARE_ALIGNED(8, uint64_t, blueDither);
 506
 507     DECLARE_ALIGNED(8, uint64_t, yCoeff);
 508     DECLARE_ALIGNED(8, uint64_t, vrCoeff);
 509     DECLARE_ALIGNED(8, uint64_t, ubCoeff);
 510     DECLARE_ALIGNED(8, uint64_t, vgCoeff);
 511     DECLARE_ALIGNED(8, uint64_t, ugCoeff);
 512     DECLARE_ALIGNED(8, uint64_t, yOffset);
 513     DECLARE_ALIGNED(8, uint64_t, uOffset);
 514     DECLARE_ALIGNED(8, uint64_t, vOffset);
 515     int32_t lumMmxFilter[4 * MAX_FILTER_SIZE];
 516     int32_t chrMmxFilter[4 * MAX_FILTER_SIZE];
 517     int dstW_mmx;
 518     DECLARE_ALIGNED(8, uint64_t, esp);
 519     DECLARE_ALIGNED(8, uint64_t, vRounder);
 520     DECLARE_ALIGNED(8, uint64_t, u_temp);
 521     DECLARE_ALIGNED(8, uint64_t, v_temp);
 522     DECLARE_ALIGNED(8, uint64_t, y_temp);
 523     int32_t alpMmxFilter[4 * MAX_FILTER_SIZE];
 524     // alignment of these values is not necessary, but merely here
 525     // to maintain the same offset across x8632 and x86-64. Once we
 526     // use proper offset macros in the asm, they can be removed.
 527     DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
 528     DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
 529     DECLARE_ALIGNED(8, uint16_t, dither16)[8];
 530     DECLARE_ALIGNED(8, uint32_t, dither32)[8];
 531
 532     const uint8_t *chrDither8, *lumDither8;
 533
 534 #if HAVE_ALTIVEC
 535     vector signed short   CY;
 536     vector signed short   CRV;
 537     vector signed short   CBU;
 538     vector signed short   CGU;
 539     vector signed short   CGV;
 540     vector signed short   OY;
 541     vector unsigned short CSHIFT;
 542     vector signed short  *vYCoeffsBank, *vCCoeffsBank;
 543 #endif
 544
 545     int use_mmx_vfilter;
 546
 547 /* pre defined color-spaces gamma */
 548 #define XYZ_GAMMA (2.6f)
 549 #define RGB_GAMMA (2.2f)
 550     uint16_t *xyzgamma;
 551     uint16_t *rgbgamma;
 552     uint16_t *xyzgammainv;
 553     uint16_t *rgbgammainv;
 554     int16_t xyz2rgb_matrix[3][4];
 555     int16_t rgb2xyz_matrix[3][4];
 556
 557     /* function pointers for swscale() */
 558     yuv2planar1_fn yuv2plane1;
 559     yuv2planarX_fn yuv2planeX;
 560     yuv2interleavedX_fn yuv2nv12cX;
 561     yuv2packed1_fn yuv2packed1;
 562     yuv2packed2_fn yuv2packed2;
 563     yuv2packedX_fn yuv2packedX;
 564     yuv2anyX_fn yuv2anyX;
 565
 566     /// Opaque data pointer passed to all input functions.
 567     void *input_opaque;
 568
 569     planar1_YV12_fn lumToYV12;
 570     planar1_YV12_fn alpToYV12;
 571     planar2_YV12_fn chrToYV12;
 572
 573     /**
 574      * Functions to read planar input, such as planar RGB, and convert
 575      * internally to Y/UV/A.
 576      */
 577     /** @{ */
 578     planarX_YV12_fn  readLumPlanar;
 579     planarX_YV12_fn  readAlpPlanar;
 580     planarX2_YV12_fn readChrPlanar;
 581     /** @} */
 582
 583     /**
 584      * Scale one horizontal line of input data using a bilinear filter
 585      * to produce one line of output data. Compared to SwsInternal->hScale(),
 586      * please take note of the following caveats when using these:
 587      * - Scaling is done using only 7 bits instead of 14-bit coefficients.
 588      * - You can use no more than 5 input pixels to produce 4 output
 589      *   pixels. Therefore, this filter should not be used for downscaling
 590      *   by more than ~20% in width (because that equals more than 5/4th
 591      *   downscaling and thus more than 5 pixels input per 4 pixels output).
 592      * - In general, bilinear filters create artifacts during downscaling
 593      *   (even when <20%), because one output pixel will span more than one
 594      *   input pixel, and thus some pixels will need edges of both neighbor
 595      *   pixels to interpolate the output pixel. Since you can use at most
 596      *   two input pixels per output pixel in bilinear scaling, this is
 597      *   impossible and thus downscaling by any size will create artifacts.
 598      * To enable this type of scaling, set SWS_FLAG_FAST_BILINEAR
 599      * in SwsInternal->flags.
 600      */
 601     /** @{ */
 602     void (*hyscale_fast)(SwsInternal *c,
 603                          int16_t *dst, int dstWidth,
 604                          const uint8_t *src, int srcW, int xInc);
 605     void (*hcscale_fast)(SwsInternal *c,
 606                          int16_t *dst1, int16_t *dst2, int dstWidth,
 607                          const uint8_t *src1, const uint8_t *src2,
 608                          int srcW, int xInc);
 609     /** @} */
 610
 611     /**
 612      * Scale one horizontal line of input data using a filter over the input
 613      * lines, to produce one (differently sized) line of output data.
 614      *
 615      * @param dst        pointer to destination buffer for horizontally scaled
 616      *                   data. If the number of bits per component of one
 617      *                   destination pixel (SwsInternal->dstBpc) is <= 10, data
 618      *                   will be 15 bpc in 16 bits (int16_t) width. Else (i.e.
 619      *                   SwsInternal->dstBpc == 16), data will be 19bpc in
 620      *                   32 bits (int32_t) width.
 621      * @param dstW       width of destination image
 622      * @param src        pointer to source data to be scaled. If the number of
 623      *                   bits per component of a source pixel (SwsInternal->srcBpc)
 624      *                   is 8, this is 8bpc in 8 bits (uint8_t) width. Else
 625      *                   (i.e. SwsInternal->dstBpc > 8), this is native depth
 626      *                   in 16 bits (uint16_t) width. In other words, for 9-bit
 627      *                   YUV input, this is 9bpc, for 10-bit YUV input, this is
 628      *                   10bpc, and for 16-bit RGB or YUV, this is 16bpc.
 629      * @param filter     filter coefficients to be used per output pixel for
 630      *                   scaling. This contains 14bpp filtering coefficients.
 631      *                   Guaranteed to contain dstW * filterSize entries.
 632      * @param filterPos  position of the first input pixel to be used for
 633      *                   each output pixel during scaling. Guaranteed to
 634      *                   contain dstW entries.
 635      * @param filterSize the number of input coefficients to be used (and
 636      *                   thus the number of input pixels to be used) for
 637      *                   creating a single output pixel. Is aligned to 4
 638      *                   (and input coefficients thus padded with zeroes)
 639      *                   to simplify creating SIMD code.
 640      */
 641     /** @{ */
 642     void (*hyScale)(SwsInternal *c, int16_t *dst, int dstW,
 643                     const uint8_t *src, const int16_t *filter,
 644                     const int32_t *filterPos, int filterSize);
 645     void (*hcScale)(SwsInternal *c, int16_t *dst, int dstW,
 646                     const uint8_t *src, const int16_t *filter,
 647                     const int32_t *filterPos, int filterSize);
 648     /** @} */
 649
 650     /**
 651      * Color range conversion functions if needed.
 652      * If SwsInternal->dstBpc is > 14:
 653      * - int16_t *dst (data is 15 bpc)
 654      * - uint16_t coeff
 655      * - int32_t offset
 656      * Otherwise (SwsInternal->dstBpc is <= 14):
 657      * - int32_t *dst (data is 19 bpc)
 658      * - uint32_t coeff
 659      * - int64_t offset
 660      */
 661     /** @{ */
 662     void (*lumConvertRange)(int16_t *dst, int width,
 663                             uint32_t coeff, int64_t offset);
 664     void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width,
 665                             uint32_t coeff, int64_t offset);
 666     /** @} */
 667
 668     uint32_t lumConvertRange_coeff;
 669     uint32_t chrConvertRange_coeff;
 670     int64_t  lumConvertRange_offset;
 671     int64_t  chrConvertRange_offset;
 672
 673     int needs_hcscale; ///< Set if there are chroma planes to be converted.
 674
 675     // scratch buffer for converting packed rgb0 sources
 676     // filled with a copy of the input frame + fully opaque alpha,
 677     // then passed as input to further conversion
 678     uint8_t     *rgb0_scratch;
 679     unsigned int rgb0_scratch_allocated;
 680
 681     // scratch buffer for converting XYZ sources
 682     // filled with the input converted to rgb48
 683     // then passed as input to further conversion
 684     uint8_t     *xyz_scratch;
 685     unsigned int xyz_scratch_allocated;
 686
 687     unsigned int dst_slice_align;
 688     atomic_int   stride_unaligned_warned;
 689     atomic_int   data_unaligned_warned;
 690     int          color_conversion_warned;
 691
 692     Half2FloatTables *h2f_tables;
 693 };
 694 //FIXME check init (where 0)
 695
 696 static_assert(offsetof(SwsInternal, redDither) + DITHER32_INT == offsetof(SwsInternal, dither32),
 697               "dither32 must be at the same offset as redDither + DITHER32_INT");
 698
 699 #if ARCH_X86_64
 700 /* x86 yuv2gbrp uses the SwsInternal for yuv coefficients
 701    if struct offsets change the asm needs to be updated too */
 702 static_assert(offsetof(SwsInternal, yuv2rgb_y_offset) == 40348,
 703               "yuv2rgb_y_offset must be updated in x86 asm");
 704 #endif
 705
 706 SwsFunc ff_yuv2rgb_get_func_ptr(SwsInternal *c);
 707 int ff_yuv2rgb_c_init_tables(SwsInternal *c, const int inv_table[4],
 708                              int fullRange, int brightness,
 709                              int contrast, int saturation);
 710 void ff_yuv2rgb_init_tables_ppc(SwsInternal *c, const int inv_table[4],
 711                                 int brightness, int contrast, int saturation);
 712
 713 void ff_updateMMXDitherTables(SwsInternal *c, int dstY);
 714
 715 void ff_update_palette(SwsInternal *c, const uint32_t *pal);
 716
 717 av_cold void ff_sws_init_range_convert(SwsInternal *c);
 718 av_cold void ff_sws_init_range_convert_aarch64(SwsInternal *c);
 719 av_cold void ff_sws_init_range_convert_loongarch(SwsInternal *c);
 720 av_cold void ff_sws_init_range_convert_riscv(SwsInternal *c);
 721 av_cold void ff_sws_init_range_convert_x86(SwsInternal *c);
 722
 723 SwsFunc ff_yuv2rgb_init_x86(SwsInternal *c);
 724 SwsFunc ff_yuv2rgb_init_ppc(SwsInternal *c);
 725 SwsFunc ff_yuv2rgb_init_loongarch(SwsInternal *c);
 726
 727 static av_always_inline int is16BPS(enum AVPixelFormat pix_fmt)
 728 {
 729     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 730     av_assert0(desc);
 731     return desc->comp[0].depth == 16;
 732 }
 733
 734 static av_always_inline int is32BPS(enum AVPixelFormat pix_fmt)
 735 {
 736     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 737     av_assert0(desc);
 738     return desc->comp[0].depth == 32;
 739 }
 740
 741 static av_always_inline int isNBPS(enum AVPixelFormat pix_fmt)
 742 {
 743     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 744     av_assert0(desc);
 745     return desc->comp[0].depth >= 9 && desc->comp[0].depth <= 14;
 746 }
 747
 748 static av_always_inline int isBE(enum AVPixelFormat pix_fmt)
 749 {
 750     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 751     av_assert0(desc);
 752     return desc->flags & AV_PIX_FMT_FLAG_BE;
 753 }
 754
 755 static av_always_inline int isYUV(enum AVPixelFormat pix_fmt)
 756 {
 757     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 758     av_assert0(desc);
 759     return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components >= 2;
 760 }
 761
 762 static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
 763 {
 764     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 765     av_assert0(desc);
 766     return ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) && isYUV(pix_fmt));
 767 }
 768
 769 /*
 770  * Identity semi-planar YUV formats. Specifically, those are YUV formats
 771  * where the second and third components (U & V) are on the same plane.
 772  */
 773 static av_always_inline int isSemiPlanarYUV(enum AVPixelFormat pix_fmt)
 774 {
 775     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 776     av_assert0(desc);
 777     return (isPlanarYUV(pix_fmt) && desc->comp[1].plane == desc->comp[2].plane);
 778 }
 779
 780 static av_always_inline int isRGB(enum AVPixelFormat pix_fmt)
 781 {
 782     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 783     av_assert0(desc);
 784     return (desc->flags & AV_PIX_FMT_FLAG_RGB);
 785 }
 786
 787 static av_always_inline int isGray(enum AVPixelFormat pix_fmt)
 788 {
 789     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 790     av_assert0(desc);
 791     return !(desc->flags & AV_PIX_FMT_FLAG_PAL) &&
 792            !(desc->flags & AV_PIX_FMT_FLAG_HWACCEL) &&
 793            desc->nb_components <= 2 &&
 794            pix_fmt != AV_PIX_FMT_MONOBLACK &&
 795            pix_fmt != AV_PIX_FMT_MONOWHITE;
 796 }
 797
 798 static av_always_inline int isRGBinInt(enum AVPixelFormat pix_fmt)
 799 {
 800     return pix_fmt == AV_PIX_FMT_RGB48BE     ||
 801            pix_fmt == AV_PIX_FMT_RGB48LE     ||
 802            pix_fmt == AV_PIX_FMT_RGB32       ||
 803            pix_fmt == AV_PIX_FMT_RGB32_1     ||
 804            pix_fmt == AV_PIX_FMT_RGB24       ||
 805            pix_fmt == AV_PIX_FMT_RGB565BE    ||
 806            pix_fmt == AV_PIX_FMT_RGB565LE    ||
 807            pix_fmt == AV_PIX_FMT_RGB555BE    ||
 808            pix_fmt == AV_PIX_FMT_RGB555LE    ||
 809            pix_fmt == AV_PIX_FMT_RGB444BE    ||
 810            pix_fmt == AV_PIX_FMT_RGB444LE    ||
 811            pix_fmt == AV_PIX_FMT_RGB8        ||
 812            pix_fmt == AV_PIX_FMT_RGB4        ||
 813            pix_fmt == AV_PIX_FMT_RGB4_BYTE   ||
 814            pix_fmt == AV_PIX_FMT_RGBA64BE    ||
 815            pix_fmt == AV_PIX_FMT_RGBA64LE    ||
 816            pix_fmt == AV_PIX_FMT_MONOBLACK   ||
 817            pix_fmt == AV_PIX_FMT_MONOWHITE;
 818 }
 819
 820 static av_always_inline int isBGRinInt(enum AVPixelFormat pix_fmt)
 821 {
 822     return pix_fmt == AV_PIX_FMT_BGR48BE     ||
 823            pix_fmt == AV_PIX_FMT_BGR48LE     ||
 824            pix_fmt == AV_PIX_FMT_BGR32       ||
 825            pix_fmt == AV_PIX_FMT_BGR32_1     ||
 826            pix_fmt == AV_PIX_FMT_BGR24       ||
 827            pix_fmt == AV_PIX_FMT_BGR565BE    ||
 828            pix_fmt == AV_PIX_FMT_BGR565LE    ||
 829            pix_fmt == AV_PIX_FMT_BGR555BE    ||
 830            pix_fmt == AV_PIX_FMT_BGR555LE    ||
 831            pix_fmt == AV_PIX_FMT_BGR444BE    ||
 832            pix_fmt == AV_PIX_FMT_BGR444LE    ||
 833            pix_fmt == AV_PIX_FMT_BGR8        ||
 834            pix_fmt == AV_PIX_FMT_BGR4        ||
 835            pix_fmt == AV_PIX_FMT_BGR4_BYTE   ||
 836            pix_fmt == AV_PIX_FMT_BGRA64BE    ||
 837            pix_fmt == AV_PIX_FMT_BGRA64LE    ||
 838            pix_fmt == AV_PIX_FMT_MONOBLACK   ||
 839            pix_fmt == AV_PIX_FMT_MONOWHITE;
 840 }
 841
 842 static av_always_inline int isBayer(enum AVPixelFormat pix_fmt)
 843 {
 844     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 845     av_assert0(desc);
 846     return !!(desc->flags & AV_PIX_FMT_FLAG_BAYER);
 847 }
 848
 849 static av_always_inline int isBayer16BPS(enum AVPixelFormat pix_fmt)
 850 {
 851     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 852     av_assert0(desc);
 853     return desc->comp[1].depth == 8;
 854 }
 855
 856 static av_always_inline int isAnyRGB(enum AVPixelFormat pix_fmt)
 857 {
 858     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 859     av_assert0(desc);
 860     return (desc->flags & AV_PIX_FMT_FLAG_RGB) ||
 861             pix_fmt == AV_PIX_FMT_MONOBLACK || pix_fmt == AV_PIX_FMT_MONOWHITE;
 862 }
 863
 864 static av_always_inline int isFloat(enum AVPixelFormat pix_fmt)
 865 {
 866     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 867     av_assert0(desc);
 868     return desc->flags & AV_PIX_FMT_FLAG_FLOAT;
 869 }
 870
 871 static av_always_inline int isFloat16(enum AVPixelFormat pix_fmt)
 872 {
 873     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 874     av_assert0(desc);
 875     return (desc->flags & AV_PIX_FMT_FLAG_FLOAT) && desc->comp[0].depth == 16;
 876 }
 877
 878 static av_always_inline int isALPHA(enum AVPixelFormat pix_fmt)
 879 {
 880     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 881     av_assert0(desc);
 882     if (pix_fmt == AV_PIX_FMT_PAL8)
 883         return 1;
 884     return desc->flags & AV_PIX_FMT_FLAG_ALPHA;
 885 }
 886
 887 static av_always_inline int isPacked(enum AVPixelFormat pix_fmt)
 888 {
 889     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 890     av_assert0(desc);
 891     return (desc->nb_components >= 2 && !(desc->flags & AV_PIX_FMT_FLAG_PLANAR)) ||
 892             pix_fmt == AV_PIX_FMT_PAL8 ||
 893             pix_fmt == AV_PIX_FMT_MONOBLACK || pix_fmt == AV_PIX_FMT_MONOWHITE;
 894 }
 895
 896 static av_always_inline int isPlanar(enum AVPixelFormat pix_fmt)
 897 {
 898     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 899     av_assert0(desc);
 900     return (desc->nb_components >= 2 && (desc->flags & AV_PIX_FMT_FLAG_PLANAR));
 901 }
 902
 903 static av_always_inline int isPackedRGB(enum AVPixelFormat pix_fmt)
 904 {
 905     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 906     av_assert0(desc);
 907     return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) == AV_PIX_FMT_FLAG_RGB);
 908 }
 909
 910 static av_always_inline int isPlanarRGB(enum AVPixelFormat pix_fmt)
 911 {
 912     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 913     av_assert0(desc);
 914     return ((desc->flags & (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB)) ==
 915             (AV_PIX_FMT_FLAG_PLANAR | AV_PIX_FMT_FLAG_RGB));
 916 }
 917
 918 static av_always_inline int usePal(enum AVPixelFormat pix_fmt)
 919 {
 920     switch (pix_fmt) {
 921     case AV_PIX_FMT_PAL8:
 922     case AV_PIX_FMT_BGR4_BYTE:
 923     case AV_PIX_FMT_BGR8:
 924     case AV_PIX_FMT_GRAY8:
 925     case AV_PIX_FMT_RGB4_BYTE:
 926     case AV_PIX_FMT_RGB8:
 927         return 1;
 928     default:
 929         return 0;
 930     }
 931 }
 932
 933 /*
 934  * Identity formats where the data is in the high bits, and the low bits are shifted away.
 935  */
 936 static av_always_inline int isDataInHighBits(enum AVPixelFormat pix_fmt)
 937 {
 938     int i;
 939     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 940     av_assert0(desc);
 941     if (desc->flags & (AV_PIX_FMT_FLAG_BITSTREAM | AV_PIX_FMT_FLAG_HWACCEL))
 942         return 0;
 943     for (i = 0; i < desc->nb_components; i++) {
 944         if (!desc->comp[i].shift)
 945             return 0;
 946         if ((desc->comp[i].shift + desc->comp[i].depth) & 0x7)
 947             return 0;
 948     }
 949     return 1;
 950 }
 951
 952 /*
 953  * Identity formats where the chroma planes are swapped (CrCb order).
 954  */
 955 static av_always_inline int isSwappedChroma(enum AVPixelFormat pix_fmt)
 956 {
 957     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
 958     av_assert0(desc);
 959     if (!isYUV(pix_fmt))
 960         return 0;
 961     if ((desc->flags & AV_PIX_FMT_FLAG_ALPHA) && desc->nb_components < 4)
 962         return 0;
 963     if (desc->nb_components < 3)
 964         return 0;
 965     if (!isPlanarYUV(pix_fmt) || isSemiPlanarYUV(pix_fmt))
 966         return desc->comp[1].offset > desc->comp[2].offset;
 967     else
 968         return desc->comp[1].plane > desc->comp[2].plane;
 969 }
 970
 971 extern const uint64_t ff_dither4[2];
 972 extern const uint64_t ff_dither8[2];
 973
 974 extern const uint8_t ff_dither_2x2_4[3][8];
 975 extern const uint8_t ff_dither_2x2_8[3][8];
 976 extern const uint8_t ff_dither_4x4_16[5][8];
 977 extern const uint8_t ff_dither_8x8_32[9][8];
 978 extern const uint8_t ff_dither_8x8_73[9][8];
 979 extern const uint8_t ff_dither_8x8_128[9][8];
 980 extern const uint8_t ff_dither_8x8_220[9][8];
 981
 982 extern const int32_t ff_yuv2rgb_coeffs[11][4];
 983
 984 extern const AVClass ff_sws_context_class;
 985
 986 int ff_sws_init_single_context(SwsContext *sws, SwsFilter *srcFilter,
 987                                SwsFilter *dstFilter);
 988
 989 /**
 990  * Set c->convert_unscaled to an unscaled converter if one exists for the
 991  * specific source and destination formats, bit depths, flags, etc.
 992  */
 993 void ff_get_unscaled_swscale(SwsInternal *c);
 994 void ff_get_unscaled_swscale_ppc(SwsInternal *c);
 995 void ff_get_unscaled_swscale_arm(SwsInternal *c);
 996 void ff_get_unscaled_swscale_aarch64(SwsInternal *c);
 997
 998 void ff_sws_init_scale(SwsInternal *c);
 999
1000 void ff_sws_init_input_funcs(SwsInternal *c,
1001                              planar1_YV12_fn *lumToYV12,
1002                              planar1_YV12_fn *alpToYV12,
1003                              planar2_YV12_fn *chrToYV12,
1004                              planarX_YV12_fn *readLumPlanar,
1005                              planarX_YV12_fn *readAlpPlanar,
1006                              planarX2_YV12_fn *readChrPlanar);
1007 void ff_sws_init_output_funcs(SwsInternal *c,
1008                               yuv2planar1_fn *yuv2plane1,
1009                               yuv2planarX_fn *yuv2planeX,
1010                               yuv2interleavedX_fn *yuv2nv12cX,
1011                               yuv2packed1_fn *yuv2packed1,
1012                               yuv2packed2_fn *yuv2packed2,
1013                               yuv2packedX_fn *yuv2packedX,
1014                               yuv2anyX_fn *yuv2anyX);
1015 void ff_sws_init_swscale_ppc(SwsInternal *c);
1016 void ff_sws_init_swscale_vsx(SwsInternal *c);
1017 void ff_sws_init_swscale_x86(SwsInternal *c);
1018 void ff_sws_init_swscale_aarch64(SwsInternal *c);
1019 void ff_sws_init_swscale_arm(SwsInternal *c);
1020 void ff_sws_init_swscale_loongarch(SwsInternal *c);
1021 void ff_sws_init_swscale_riscv(SwsInternal *c);
1022
1023 void ff_hyscale_fast_c(SwsInternal *c, int16_t *dst, int dstWidth,
1024                        const uint8_t *src, int srcW, int xInc);
1025 void ff_hcscale_fast_c(SwsInternal *c, int16_t *dst1, int16_t *dst2,
1026                        int dstWidth, const uint8_t *src1,
1027                        const uint8_t *src2, int srcW, int xInc);
1028 int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
1029                            int16_t *filter, int32_t *filterPos,
1030                            int numSplits);
1031 void ff_hyscale_fast_mmxext(SwsInternal *c, int16_t *dst,
1032                             int dstWidth, const uint8_t *src,
1033                             int srcW, int xInc);
1034 void ff_hcscale_fast_mmxext(SwsInternal *c, int16_t *dst1, int16_t *dst2,
1035                             int dstWidth, const uint8_t *src1,
1036                             const uint8_t *src2, int srcW, int xInc);
1037
1038 int ff_sws_alphablendaway(SwsInternal *c, const uint8_t *const src[],
1039                           const int srcStride[], int srcSliceY, int srcSliceH,
1040                           uint8_t *const dst[], const int dstStride[]);
1041
1042 void ff_copyPlane(const uint8_t *src, int srcStride,
1043                   int srcSliceY, int srcSliceH, int width,
1044                   uint8_t *dst, int dstStride);
1045
1046 void ff_xyz12Torgb48(const SwsInternal *c, uint8_t *dst, int dst_stride,
1047                      const uint8_t *src, int src_stride, int w, int h);
1048
1049 void ff_rgb48Toxyz12(const SwsInternal *c, uint8_t *dst, int dst_stride,
1050                      const uint8_t *src, int src_stride, int w, int h);
1051
1052 static inline void fillPlane16(uint8_t *plane, int stride, int width, int height, int y,
1053                                int alpha, int bits, const int big_endian)
1054 {
1055     uint8_t *ptr = plane + stride * y;
1056     int v = alpha ? 0xFFFF>>(16-bits) : (1<<(bits-1));
1057     if (big_endian != HAVE_BIGENDIAN)
1058         v = av_bswap16(v);
1059     for (int i = 0; i < height; i++) {
1060         for (int j = 0; j < width; j++)
1061             AV_WN16(ptr + 2 * j, v);
1062         ptr += stride;
1063     }
1064 }
1065
1066 static inline void fillPlane32(uint8_t *plane, int stride, int width, int height, int y,
1067                                int alpha, int bits, const int big_endian, int is_float)
1068 {
1069     uint8_t *ptr = plane + stride * y;
1070     uint32_t v;
1071     uint32_t onef32 = 0x3f800000;
1072     if (is_float)
1073         v = alpha ? onef32 : 0;
1074     else
1075         v = alpha ? 0xFFFFFFFF>>(32-bits) : (1<<(bits-1));
1076     if (big_endian != HAVE_BIGENDIAN)
1077         v = av_bswap32(v);
1078
1079     for (int i = 0; i < height; i++) {
1080         for (int j = 0; j < width; j++)
1081             AV_WN32(ptr + 4 * j, v);
1082         ptr += stride;
1083     }
1084 }
1085
1086
1087 #define MAX_SLICE_PLANES 4
1088
1089 /// Slice plane
1090 typedef struct SwsPlane
1091 {
1092     int available_lines;    ///< max number of lines that can be hold by this plane
1093     int sliceY;             ///< index of first line
1094     int sliceH;             ///< number of lines
1095     uint8_t **line;         ///< line buffer
1096     uint8_t **tmp;          ///< Tmp line buffer used by mmx code
1097 } SwsPlane;
1098
1099 /**
1100  * Struct which defines a slice of an image to be scaled or an output for
1101  * a scaled slice.
1102  * A slice can also be used as intermediate ring buffer for scaling steps.
1103  */
1104 typedef struct SwsSlice
1105 {
1106     int width;              ///< Slice line width
1107     int h_chr_sub_sample;   ///< horizontal chroma subsampling factor
1108     int v_chr_sub_sample;   ///< vertical chroma subsampling factor
1109     int is_ring;            ///< flag to identify if this slice is a ring buffer
1110     int should_free_lines;  ///< flag to identify if there are dynamic allocated lines
1111     enum AVPixelFormat fmt; ///< planes pixel format
1112     SwsPlane plane[MAX_SLICE_PLANES];   ///< color planes
1113 } SwsSlice;
1114
1115 /**
1116  * Struct which holds all necessary data for processing a slice.
1117  * A processing step can be a color conversion or horizontal/vertical scaling.
1118  */
1119 typedef struct SwsFilterDescriptor
1120 {
1121     SwsSlice *src;  ///< Source slice
1122     SwsSlice *dst;  ///< Output slice
1123
1124     int alpha;      ///< Flag for processing alpha channel
1125     void *instance; ///< Filter instance data
1126
1127     /// Function for processing input slice sliceH lines starting from line sliceY
1128     int (*process)(SwsInternal *c, struct SwsFilterDescriptor *desc, int sliceY, int sliceH);
1129 } SwsFilterDescriptor;
1130
1131 // warp input lines in the form (src + width*i + j) to slice format (line[i][j])
1132 // relative=true means first line src[x][0] otherwise first line is src[x][lum/crh Y]
1133 int ff_init_slice_from_src(SwsSlice * s, uint8_t *const src[4], const int stride[4],
1134                            int srcW, int lumY, int lumH, int chrY, int chrH, int relative);
1135
1136 // Initialize scaler filter descriptor chain
1137 int ff_init_filters(SwsInternal *c);
1138
1139 // Free all filter data
1140 int ff_free_filters(SwsInternal *c);
1141
1142 /*
1143  function for applying ring buffer logic into slice s
1144  It checks if the slice can hold more @lum lines, if yes
1145  do nothing otherwise remove @lum least used lines.
1146  It applies the same procedure for @chr lines.
1147 */
1148 int ff_rotate_slice(SwsSlice *s, int lum, int chr);
1149
1150 /// initializes gamma conversion descriptor
1151 int ff_init_gamma_convert(SwsFilterDescriptor *desc, SwsSlice * src, uint16_t *table);
1152
1153 /// initializes lum pixel format conversion descriptor
1154 int ff_init_desc_fmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal);
1155
1156 /// initializes lum horizontal scaling descriptor
1157 int ff_init_desc_hscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc);
1158
1159 /// initializes chr pixel format conversion descriptor
1160 int ff_init_desc_cfmt_convert(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst, uint32_t *pal);
1161
1162 /// initializes chr horizontal scaling descriptor
1163 int ff_init_desc_chscale(SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst, uint16_t *filter, int * filter_pos, int filter_size, int xInc);
1164
1165 int ff_init_desc_no_chr(SwsFilterDescriptor *desc, SwsSlice * src, SwsSlice *dst);
1166
1167 /// initializes vertical scaling descriptors
1168 int ff_init_vscale(SwsInternal *c, SwsFilterDescriptor *desc, SwsSlice *src, SwsSlice *dst);
1169
1170 /// setup vertical scaler functions
1171 void ff_init_vscale_pfn(SwsInternal *c, yuv2planar1_fn yuv2plane1, yuv2planarX_fn yuv2planeX,
1172     yuv2interleavedX_fn yuv2nv12cX, yuv2packed1_fn yuv2packed1, yuv2packed2_fn yuv2packed2,
1173     yuv2packedX_fn yuv2packedX, yuv2anyX_fn yuv2anyX, int use_mmx);
1174
1175 void ff_sws_slice_worker(void *priv, int jobnr, int threadnr,
1176                          int nb_jobs, int nb_threads);
1177
1178 int ff_swscale(SwsInternal *c, const uint8_t *const src[], const int srcStride[],
1179                int srcSliceY, int srcSliceH, uint8_t *const dst[],
1180                const int dstStride[], int dstSliceY, int dstSliceH);
1181
1182
1183 //number of extra lines to process
1184 #define MAX_LINES_AHEAD 4
1185
1186 //shuffle filter and filterPos for hyScale and hcScale filters in avx2
1187 int ff_shuffle_filter_coefficients(SwsInternal *c, int* filterPos, int filterSize, int16_t *filter, int dstW);
1188 #endif /* SWSCALE_SWSCALE_INTERNAL_H */