libswscale/aarch64/swscale_unscaled.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with FFmpeg; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 #include "config.h"
  20 #include "libswscale/swscale.h"
  21 #include "libswscale/swscale_internal.h"
  22 #include "libavutil/aarch64/cpu.h"
  23
  24 #define YUV_TO_RGB_TABLE                                                                    \
  25         c->yuv2rgb_v2r_coeff,                                                               \
  26         c->yuv2rgb_u2g_coeff,                                                               \
  27         c->yuv2rgb_v2g_coeff,                                                               \
  28         c->yuv2rgb_u2b_coeff,                                                               \
  29
  30 #define DECLARE_FF_YUVX_TO_RGBX_FUNCS(ifmt, ofmt)                                           \
  31 int ff_##ifmt##_to_##ofmt##_neon(int w, int h,                                              \
  32                                  uint8_t *dst, int linesize,                                \
  33                                  const uint8_t *srcY, int linesizeY,                        \
  34                                  const uint8_t *srcU, int linesizeU,                        \
  35                                  const uint8_t *srcV, int linesizeV,                        \
  36                                  const int16_t *table,                                      \
  37                                  int y_offset,                                              \
  38                                  int y_coeff);                                              \
  39                                                                                             \
  40 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[],      \
  41                                            const int srcStride[], int srcSliceY,            \
  42                                            int srcSliceH, uint8_t *const dst[],             \
  43                                            const int dstStride[]) {                         \
  44     const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE };                                   \
  45                                                                                             \
  46     return ff_##ifmt##_to_##ofmt##_neon(c->opts.src_w, srcSliceH,                           \
  47                                         dst[0] + srcSliceY * dstStride[0], dstStride[0],    \
  48                                         src[0], srcStride[0],                               \
  49                                         src[1], srcStride[1],                               \
  50                                         src[2], srcStride[2],                               \
  51                                         yuv2rgb_table,                                      \
  52                                         c->yuv2rgb_y_offset >> 6,                           \
  53                                         c->yuv2rgb_y_coeff);                                \
  54 }                                                                                           \
  55
  56 #define DECLARE_FF_YUVX_TO_GBRP_FUNCS(ifmt, ofmt)                                           \
  57 int ff_##ifmt##_to_##ofmt##_neon(int w, int h,                                              \
  58                                  uint8_t *dst, int linesize,                                \
  59                                  const uint8_t *srcY, int linesizeY,                        \
  60                                  const uint8_t *srcU, int linesizeU,                        \
  61                                  const uint8_t *srcV, int linesizeV,                        \
  62                                  const int16_t *table,                                      \
  63                                  int y_offset,                                              \
  64                                  int y_coeff,                                               \
  65                                  uint8_t *dst1, int linesize1,                              \
  66                                  uint8_t *dst2, int linesize2);                             \
  67                                                                                             \
  68 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[],      \
  69                                            const int srcStride[], int srcSliceY,            \
  70                                            int srcSliceH, uint8_t *const dst[],             \
  71                                            const int dstStride[]) {                         \
  72     const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE };                                   \
  73                                                                                             \
  74     return ff_##ifmt##_to_##ofmt##_neon(c->opts.src_w, srcSliceH,                           \
  75                                         dst[0] + srcSliceY * dstStride[0], dstStride[0],    \
  76                                         src[0], srcStride[0],                               \
  77                                         src[1], srcStride[1],                               \
  78                                         src[2], srcStride[2],                               \
  79                                         yuv2rgb_table,                                      \
  80                                         c->yuv2rgb_y_offset >> 6,                           \
  81                                         c->yuv2rgb_y_coeff,                                 \
  82                                         dst[1] + srcSliceY * dstStride[1], dstStride[1],    \
  83                                         dst[2] + srcSliceY * dstStride[2], dstStride[2]);   \
  84 }                                                                                           \
  85
  86 #define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx)                                             \
  87 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, argb)                                                   \
  88 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, rgba)                                                   \
  89 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, abgr)                                                   \
  90 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, bgra)                                                   \
  91 DECLARE_FF_YUVX_TO_GBRP_FUNCS(yuvx, gbrp)                                                   \
  92
  93 DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuv420p)
  94 DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuv422p)
  95
  96 #define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt)                                            \
  97 int ff_##ifmt##_to_##ofmt##_neon(int w, int h,                                              \
  98                                  uint8_t *dst, int linesize,                                \
  99                                  const uint8_t *srcY, int linesizeY,                        \
 100                                  const uint8_t *srcC, int linesizeC,                        \
 101                                  const int16_t *table,                                      \
 102                                  int y_offset,                                              \
 103                                  int y_coeff);                                              \
 104                                                                                             \
 105 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[],      \
 106                                            const int srcStride[], int srcSliceY,            \
 107                                            int srcSliceH, uint8_t *const dst[],             \
 108                                            const int dstStride[]) {                         \
 109     const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE };                                   \
 110                                                                                             \
 111     return ff_##ifmt##_to_##ofmt##_neon(c->opts.src_w, srcSliceH,                           \
 112                                         dst[0] + srcSliceY * dstStride[0], dstStride[0],    \
 113                                         src[0], srcStride[0], src[1], srcStride[1],         \
 114                                         yuv2rgb_table,                                      \
 115                                         c->yuv2rgb_y_offset >> 6,                           \
 116                                         c->yuv2rgb_y_coeff);                                \
 117 }                                                                                           \
 118
 119 #define DECLARE_FF_NVX_TO_GBRP_FUNCS(ifmt, ofmt)                                            \
 120 int ff_##ifmt##_to_##ofmt##_neon(int w, int h,                                              \
 121                                  uint8_t *dst, int linesize,                                \
 122                                  const uint8_t *srcY, int linesizeY,                        \
 123                                  const uint8_t *srcC, int linesizeC,                        \
 124                                  const int16_t *table,                                      \
 125                                  int y_offset,                                              \
 126                                  int y_coeff,                                               \
 127                                  uint8_t *dst1, int linesize1,                              \
 128                                  uint8_t *dst2, int linesize2);                             \
 129                                                                                             \
 130 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[],      \
 131                                            const int srcStride[], int srcSliceY,            \
 132                                            int srcSliceH, uint8_t *const dst[],             \
 133                                            const int dstStride[]) {                         \
 134     const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE };                                   \
 135                                                                                             \
 136     return ff_##ifmt##_to_##ofmt##_neon(c->opts.src_w, srcSliceH,                           \
 137                                         dst[0] + srcSliceY * dstStride[0], dstStride[0],    \
 138                                         src[0], srcStride[0], src[1], srcStride[1],         \
 139                                         yuv2rgb_table,                                      \
 140                                         c->yuv2rgb_y_offset >> 6,                           \
 141                                         c->yuv2rgb_y_coeff,                                 \
 142                                         dst[1] + srcSliceY * dstStride[1], dstStride[1],    \
 143                                         dst[2] + srcSliceY * dstStride[2], dstStride[2]);   \
 144 }                                                                                           \
 145
 146 void ff_nv24_to_yuv420p_chroma_neon(uint8_t *dst1, int dstStride1,
 147                                     uint8_t *dst2, int dstStride2,
 148                                     const uint8_t *src, int srcStride,
 149                                     int w, int h);
 150
 151 static int nv24_to_yuv420p_neon_wrapper(SwsInternal *c, const uint8_t *const src[],
 152                                         const int srcStride[], int srcSliceY, int srcSliceH,
 153                                         uint8_t *const dst[], const int dstStride[])
 154 {
 155     uint8_t *dst1 = dst[1] + dstStride[1] * srcSliceY / 2;
 156     uint8_t *dst2 = dst[2] + dstStride[2] * srcSliceY / 2;
 157
 158     ff_copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->opts.src_w,
 159                  dst[0], dstStride[0]);
 160
 161     if (c->opts.src_format == AV_PIX_FMT_NV24)
 162         ff_nv24_to_yuv420p_chroma_neon(dst1, dstStride[1], dst2, dstStride[2],
 163                                        src[1], srcStride[1], c->opts.src_w / 2,
 164                                        srcSliceH);
 165     else
 166         ff_nv24_to_yuv420p_chroma_neon(dst2, dstStride[2], dst1, dstStride[1],
 167                                        src[1], srcStride[1], c->opts.src_w / 2,
 168                                        srcSliceH);
 169
 170     return srcSliceH;
 171 }
 172
 173 #define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx)                                               \
 174 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb)                                                     \
 175 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba)                                                     \
 176 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr)                                                     \
 177 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra)                                                     \
 178 DECLARE_FF_NVX_TO_GBRP_FUNCS(nvx, gbrp)                                                     \
 179
 180 DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv12)
 181 DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nv21)
 182
 183 /* We need a 16 pixel width alignment. This constraint can easily be removed
 184  * for input reading but for the output which is 4-bytes per pixel (RGBA) the
 185  * assembly might be writing as much as 4*15=60 extra bytes at the end of the
 186  * line, which won't fit the 32-bytes buffer alignment. */
 187 #define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do {                  \
 188     if (c->opts.src_format == AV_PIX_FMT_##IFMT                                             \
 189         && c->opts.dst_format == AV_PIX_FMT_##OFMT                                          \
 190         && !(c->opts.src_h & 1)                                                             \
 191         && !(c->opts.src_w & 15)                                                            \
 192         && !accurate_rnd)                                                                   \
 193         c->convert_unscaled = ifmt##_to_##ofmt##_neon_wrapper;                              \
 194 } while (0)
 195
 196 #define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd) do {                            \
 197     SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB, accurate_rnd);                            \
 198     SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA, accurate_rnd);                            \
 199     SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR, accurate_rnd);                            \
 200     SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA, accurate_rnd);                            \
 201     SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, gbrp, GBRP, accurate_rnd);                            \
 202 } while (0)
 203
 204 static void get_unscaled_swscale_neon(SwsInternal *c) {
 205     int accurate_rnd = c->opts.flags & SWS_ACCURATE_RND;
 206
 207     SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
 208     SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
 209     SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
 210     SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
 211
 212     if (c->opts.dst_format == AV_PIX_FMT_YUV420P &&
 213         (c->opts.src_format == AV_PIX_FMT_NV24 || c->opts.src_format == AV_PIX_FMT_NV42) &&
 214         !(c->opts.src_h & 1) && !(c->opts.src_w & 15) && !accurate_rnd)
 215         c->convert_unscaled = nv24_to_yuv420p_neon_wrapper;
 216 }
 217
 218 void ff_get_unscaled_swscale_aarch64(SwsInternal *c)
 219 {
 220     int cpu_flags = av_get_cpu_flags();
 221     if (have_neon(cpu_flags))
 222         get_unscaled_swscale_neon(c);
 223 }