tests/checkasm/float_dsp.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License along
  15  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  17  */
  18
  19 #include <float.h>
  20 #include <stdint.h>
  21
  22 #include "libavutil/float_dsp.h"
  23 #include "libavutil/internal.h"
  24 #include "libavutil/mem.h"
  25 #include "libavutil/mem_internal.h"
  26
  27 #include "checkasm.h"
  28
  29 #define LEN 256
  30
  31 #define randomize_buffer(buf)                 \
  32 do {                                          \
  33     int i;                                    \
  34     double bmg[2], stddev = 10.0, mean = 0.0; \
  35                                               \
  36     for (i = 0; i < LEN; i += 2) {            \
  37         av_bmg_get(&checkasm_lfg, bmg);       \
  38         buf[i]     = bmg[0] * stddev + mean;  \
  39         buf[i + 1] = bmg[1] * stddev + mean;  \
  40     }                                         \
  41 } while(0);
  42
  43 static void test_vector_fmul(const float *src0, const float *src1)
  44 {
  45     LOCAL_ALIGNED_32(float, cdst, [LEN]);
  46     LOCAL_ALIGNED_32(float, odst, [LEN]);
  47     int i;
  48
  49     declare_func(void, float *dst, const float *src0, const float *src1,
  50                  int len);
  51
  52     call_ref(cdst, src0, src1, LEN);
  53     call_new(odst, src0, src1, LEN);
  54     for (i = 0; i < LEN; i++) {
  55         double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
  56         if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
  57             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
  58                     i, cdst[i], odst[i], cdst[i] - odst[i]);
  59             fail();
  60             break;
  61         }
  62     }
  63     bench_new(odst, src0, src1, LEN);
  64 }
  65
  66 static void test_vector_dmul(const double *src0, const double *src1)
  67 {
  68     LOCAL_ALIGNED_32(double, cdst, [LEN]);
  69     LOCAL_ALIGNED_32(double, odst, [LEN]);
  70     int i;
  71
  72     declare_func(void, double *dst, const double *src0, const double *src1,
  73                  int len);
  74
  75     call_ref(cdst, src0, src1, LEN);
  76     call_new(odst, src0, src1, LEN);
  77     for (i = 0; i < LEN; i++) {
  78         double t = fabs(src0[i]) + fabs(src1[i]) + fabs(src0[i] * src1[i]) + 1.0;
  79         if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
  80             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
  81                     i, cdst[i], odst[i], cdst[i] - odst[i]);
  82             fail();
  83             break;
  84         }
  85     }
  86     bench_new(odst, src0, src1, LEN);
  87 }
  88
  89 #define ARBITRARY_FMUL_ADD_CONST 0.005
  90 static void test_vector_fmul_add(const float *src0, const float *src1, const float *src2)
  91 {
  92     LOCAL_ALIGNED_32(float, cdst, [LEN]);
  93     LOCAL_ALIGNED_32(float, odst, [LEN]);
  94     int i;
  95
  96     declare_func(void, float *dst, const float *src0, const float *src1,
  97                      const float *src2, int len);
  98
  99     call_ref(cdst, src0, src1, src2, LEN);
 100     call_new(odst, src0, src1, src2, LEN);
 101     for (i = 0; i < LEN; i++) {
 102         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_ADD_CONST)) {
 103             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 104                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 105             fail();
 106             break;
 107         }
 108     }
 109     bench_new(odst, src0, src1, src2, LEN);
 110 }
 111
 112 static void test_vector_fmul_scalar(const float *src0, const float *src1)
 113 {
 114     LOCAL_ALIGNED_16(float, cdst, [LEN]);
 115     LOCAL_ALIGNED_16(float, odst, [LEN]);
 116     int i;
 117
 118     declare_func(void, float *dst, const float *src, float mul, int len);
 119
 120     call_ref(cdst, src0, src1[0], LEN);
 121     call_new(odst, src0, src1[0], LEN);
 122         for (i = 0; i < LEN; i++) {
 123             double t = fabs(src0[i]) + fabs(src1[0]) + fabs(src0[i] * src1[0]) + 1.0;
 124             if (!float_near_abs_eps(cdst[i], odst[i], t * 2 * FLT_EPSILON)) {
 125                 fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 126                         i, cdst[i], odst[i], cdst[i] - odst[i]);
 127                 fail();
 128                 break;
 129             }
 130         }
 131     bench_new(odst, src0, src1[0], LEN);
 132 }
 133
 134 #define ARBITRARY_FMUL_WINDOW_CONST 0.008
 135 static void test_vector_fmul_window(const float *src0, const float *src1, const float *win)
 136 {
 137     LOCAL_ALIGNED_16(float, cdst, [LEN]);
 138     LOCAL_ALIGNED_16(float, odst, [LEN]);
 139     int i;
 140
 141     declare_func(void, float *dst, const float *src0, const float *src1,
 142                  const float *win, int len);
 143
 144     call_ref(cdst, src0, src1, win, LEN / 2);
 145     call_new(odst, src0, src1, win, LEN / 2);
 146     for (i = 0; i < LEN; i++) {
 147         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMUL_WINDOW_CONST)) {
 148             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 149                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 150             fail();
 151             break;
 152         }
 153     }
 154     bench_new(odst, src0, src1, win, LEN / 2);
 155 }
 156
 157 #define ARBITRARY_FMAC_SCALAR_CONST 0.005
 158 static void test_vector_fmac_scalar(const float *src0, const float *src1, const float *src2)
 159 {
 160     LOCAL_ALIGNED_32(float, cdst, [LEN]);
 161     LOCAL_ALIGNED_32(float, odst, [LEN]);
 162     int i;
 163
 164     declare_func(void, float *dst, const float *src, float mul, int len);
 165
 166     memcpy(cdst, src2, LEN * sizeof(*src2));
 167     memcpy(odst, src2, LEN * sizeof(*src2));
 168
 169     call_ref(cdst, src0, src1[0], LEN);
 170     call_new(odst, src0, src1[0], LEN);
 171     for (i = 0; i < LEN; i++) {
 172         if (!float_near_abs_eps(cdst[i], odst[i], ARBITRARY_FMAC_SCALAR_CONST)) {
 173             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 174                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 175             fail();
 176             break;
 177         }
 178     }
 179     memcpy(odst, src2, LEN * sizeof(*src2));
 180     bench_new(odst, src0, src1[0], LEN);
 181 }
 182
 183 static void test_vector_dmul_scalar(const double *src0, const double *src1)
 184 {
 185     LOCAL_ALIGNED_32(double, cdst, [LEN]);
 186     LOCAL_ALIGNED_32(double, odst, [LEN]);
 187     int i;
 188
 189     declare_func(void, double *dst, const double *src, double mul, int len);
 190
 191     call_ref(cdst, src0, src1[0], LEN);
 192     call_new(odst, src0, src1[0], LEN);
 193     for (i = 0; i < LEN; i++) {
 194         double t = fabs(src1[0]) + fabs(src0[i]) + fabs(src1[0] * src0[i]) + 1.0;
 195         if (!double_near_abs_eps(cdst[i], odst[i], t * 2 * DBL_EPSILON)) {
 196             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i,
 197                     cdst[i], odst[i], cdst[i] - odst[i]);
 198             fail();
 199             break;
 200         }
 201     }
 202     bench_new(odst, src0, src1[0], LEN);
 203 }
 204
 205 #define ARBITRARY_DMAC_SCALAR_CONST 0.005
 206 static void test_vector_dmac_scalar(const double *src0, const double *src1, const double *src2)
 207 {
 208     LOCAL_ALIGNED_32(double, cdst, [LEN]);
 209     LOCAL_ALIGNED_32(double, odst, [LEN]);
 210     int i;
 211
 212     declare_func(void, double *dst, const double *src, double mul, int len);
 213
 214     memcpy(cdst, src2, LEN * sizeof(*src2));
 215     memcpy(odst, src2, LEN * sizeof(*src2));
 216     call_ref(cdst, src0, src1[0], LEN);
 217     call_new(odst, src0, src1[0], LEN);
 218     for (i = 0; i < LEN; i++) {
 219         if (!double_near_abs_eps(cdst[i], odst[i], ARBITRARY_DMAC_SCALAR_CONST)) {
 220             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 221                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 222             fail();
 223             break;
 224         }
 225     }
 226     memcpy(odst, src2, LEN * sizeof(*src2));
 227     bench_new(odst, src0, src1[0], LEN);
 228 }
 229
 230 static void test_butterflies_float(const float *src0, const float *src1)
 231 {
 232     LOCAL_ALIGNED_16(float,  cdst,  [LEN]);
 233     LOCAL_ALIGNED_16(float,  odst,  [LEN]);
 234     LOCAL_ALIGNED_16(float,  cdst1, [LEN]);
 235     LOCAL_ALIGNED_16(float,  odst1, [LEN]);
 236     int i;
 237
 238     declare_func(void, float *restrict src0, float *restrict src1,
 239     int len);
 240
 241     memcpy(cdst,  src0, LEN * sizeof(*src0));
 242     memcpy(cdst1, src1, LEN * sizeof(*src1));
 243     memcpy(odst,  src0, LEN * sizeof(*src0));
 244     memcpy(odst1, src1, LEN * sizeof(*src1));
 245
 246     call_ref(cdst, cdst1, LEN);
 247     call_new(odst, odst1, LEN);
 248     for (i = 0; i < LEN; i++) {
 249         if (!float_near_abs_eps(cdst[i],  odst[i],  FLT_EPSILON) ||
 250             !float_near_abs_eps(cdst1[i], odst1[i], FLT_EPSILON)) {
 251             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 252                     i, cdst[i], odst[i], cdst[i] - odst[i]);
 253             fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n",
 254                     i, cdst1[i], odst1[i], cdst1[i] - odst1[i]);
 255             fail();
 256             break;
 257         }
 258     }
 259     memcpy(odst,  src0, LEN * sizeof(*src0));
 260     memcpy(odst1, src1, LEN * sizeof(*src1));
 261     bench_new(odst, odst1, LEN);
 262 }
 263
 264 #define ARBITRARY_SCALARPRODUCT_CONST 0.2
 265 static void test_scalarproduct_float(const float *src0, const float *src1)
 266 {
 267     float cprod, oprod;
 268
 269     declare_func_float(float, const float *src0, const float *src1, int len);
 270
 271     cprod = call_ref(src0, src1, LEN);
 272     oprod = call_new(src0, src1, LEN);
 273     if (!float_near_abs_eps(cprod, oprod, ARBITRARY_SCALARPRODUCT_CONST)) {
 274         fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
 275                 cprod, oprod, cprod - oprod);
 276         fail();
 277     }
 278     bench_new(src0, src1, LEN);
 279 }
 280
 281 static void test_scalarproduct_double(const double *src0, const double *src1)
 282 {
 283     double cprod, oprod;
 284
 285     declare_func_float(double, const double *, const double *, size_t);
 286
 287     cprod = call_ref(src0, src1, LEN);
 288     oprod = call_new(src0, src1, LEN);
 289     if (!double_near_abs_eps(cprod, oprod, ARBITRARY_SCALARPRODUCT_CONST)) {
 290         fprintf(stderr, "%- .12f - %- .12f = % .12g\n",
 291                 cprod, oprod, cprod - oprod);
 292         fail();
 293     }
 294     bench_new(src0, src1, LEN);
 295 }
 296
 297 void checkasm_check_float_dsp(void)
 298 {
 299     LOCAL_ALIGNED_32(float,  src0,     [LEN]);
 300     LOCAL_ALIGNED_32(float,  src1,     [LEN]);
 301     LOCAL_ALIGNED_32(float,  src2,     [LEN]);
 302     LOCAL_ALIGNED_16(float,  src3,     [LEN]);
 303     LOCAL_ALIGNED_16(float,  src4,     [LEN]);
 304     LOCAL_ALIGNED_16(float,  src5,     [LEN]);
 305     LOCAL_ALIGNED_32(double, dbl_src0, [LEN]);
 306     LOCAL_ALIGNED_32(double, dbl_src1, [LEN]);
 307     LOCAL_ALIGNED_32(double, dbl_src2, [LEN]);
 308     AVFloatDSPContext *fdsp = avpriv_float_dsp_alloc(1);
 309
 310     if (!fdsp) {
 311         fprintf(stderr, "floatdsp: Out of memory error\n");
 312         return;
 313     }
 314
 315     randomize_buffer(src0);
 316     randomize_buffer(src1);
 317     randomize_buffer(src2);
 318     randomize_buffer(src3);
 319     randomize_buffer(src4);
 320     randomize_buffer(src5);
 321     randomize_buffer(dbl_src0);
 322     randomize_buffer(dbl_src1);
 323     randomize_buffer(dbl_src2);
 324
 325     if (check_func(fdsp->vector_fmul, "vector_fmul"))
 326         test_vector_fmul(src0, src1);
 327     if (check_func(fdsp->vector_fmul_add, "vector_fmul_add"))
 328         test_vector_fmul_add(src0, src1, src2);
 329     if (check_func(fdsp->vector_fmul_scalar, "vector_fmul_scalar"))
 330         test_vector_fmul_scalar(src3, src4);
 331     if (check_func(fdsp->vector_fmul_reverse, "vector_fmul_reverse"))
 332         test_vector_fmul(src0, src1);
 333     if (check_func(fdsp->vector_fmul_window, "vector_fmul_window"))
 334         test_vector_fmul_window(src3, src4, src5);
 335     report("vector_fmul");
 336     if (check_func(fdsp->vector_fmac_scalar, "vector_fmac_scalar"))
 337         test_vector_fmac_scalar(src0, src1, src2);
 338     report("vector_fmac");
 339     if (check_func(fdsp->vector_dmul, "vector_dmul"))
 340         test_vector_dmul(dbl_src0, dbl_src1);
 341     if (check_func(fdsp->vector_dmul_scalar, "vector_dmul_scalar"))
 342         test_vector_dmul_scalar(dbl_src0, dbl_src1);
 343     report("vector_dmul");
 344     if (check_func(fdsp->vector_dmac_scalar, "vector_dmac_scalar"))
 345         test_vector_dmac_scalar(dbl_src0, dbl_src1, dbl_src2);
 346     report("vector_dmac");
 347     if (check_func(fdsp->butterflies_float, "butterflies_float"))
 348         test_butterflies_float(src3, src4);
 349     report("butterflies_float");
 350     if (check_func(fdsp->scalarproduct_float, "scalarproduct_float"))
 351         test_scalarproduct_float(src3, src4);
 352     report("scalarproduct_float");
 353     if (check_func(fdsp->scalarproduct_double, "scalarproduct_double"))
 354         test_scalarproduct_double(dbl_src0, dbl_src1);
 355     report("scalarproduct_double");
 356
 357     av_freep(&fdsp);
 358 }