source/modules/juce_dsp/native/juce_fallback_SIMDNativeOps.h

   1 /*
   2   ==============================================================================
   3
   4    This file is part of the JUCE library.
   5    Copyright (c) 2022 - Raw Material Software Limited
   6
   7    JUCE is an open source library subject to commercial or open-source
   8    licensing.
   9
  10    By using JUCE, you agree to the terms of both the JUCE 7 End-User License
  11    Agreement and JUCE Privacy Policy.
  12
  13    End User License Agreement: www.juce.com/juce-7-licence
  14    Privacy Policy: www.juce.com/juce-privacy-policy
  15
  16    Or: You may also use this code under the terms of the GPL v3 (see
  17    www.gnu.org/licenses).
  18
  19    JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
  20    EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
  21    DISCLAIMED.
  22
  23   ==============================================================================
  24 */
  25
  26 namespace juce
  27 {
  28 namespace dsp
  29 {
  30
  31 /** A template specialisation to find corresponding mask type for primitives. */
  32 namespace SIMDInternal
  33 {
  34     template <typename Primitive> struct MaskTypeFor        { using type = Primitive; };
  35     template <> struct MaskTypeFor <float>                  { using type = uint32_t; };
  36     template <> struct MaskTypeFor <double>                 { using type = uint64_t; };
  37     template <> struct MaskTypeFor <char>                   { using type = uint8_t; };
  38     template <> struct MaskTypeFor <int8_t>                 { using type = uint8_t; };
  39     template <> struct MaskTypeFor <int16_t>                { using type = uint16_t; };
  40     template <> struct MaskTypeFor <int32_t>                { using type = uint32_t; };
  41     template <> struct MaskTypeFor <int64_t>                { using type = uint64_t; };
  42     template <> struct MaskTypeFor <std::complex<float>>    { using type = uint32_t; };
  43     template <> struct MaskTypeFor <std::complex<double>>   { using type = uint64_t; };
  44
  45     template <typename Primitive> struct PrimitiveType                           { using type = typename std::remove_cv<Primitive>::type; };
  46     template <typename Primitive> struct PrimitiveType<std::complex<Primitive>>  { using type = typename std::remove_cv<Primitive>::type; };
  47
  48     template <int n>    struct Log2Helper    { enum { value = Log2Helper<n/2>::value + 1 }; };
  49     template <>         struct Log2Helper<1> { enum { value = 0 }; };
  50 }
  51
  52 /**
  53     Useful fallback routines to use if the native SIMD op is not supported. You
  54     should never need to use this directly. Use juce_SIMDRegister instead.
  55
  56     @tags{DSP}
  57 */
  58 template <typename ScalarType, typename vSIMDType>
  59 struct SIMDFallbackOps
  60 {
  61     static constexpr size_t n    =  sizeof (vSIMDType) / sizeof (ScalarType);
  62     static constexpr size_t mask = (sizeof (vSIMDType) / sizeof (ScalarType)) - 1;
  63     static constexpr size_t bits = SIMDInternal::Log2Helper<(int) n>::value;
  64
  65     // helper types
  66     using MaskType = typename SIMDInternal::MaskTypeFor<ScalarType>::type;
  67     union UnionType     { vSIMDType v; ScalarType s[n]; };
  68     union UnionMaskType { vSIMDType v; MaskType   m[n]; };
  69
  70
  71     // fallback methods
  72     static forcedinline vSIMDType add (vSIMDType a, vSIMDType b) noexcept        { return apply<ScalarAdd> (a, b); }
  73     static forcedinline vSIMDType sub (vSIMDType a, vSIMDType b) noexcept        { return apply<ScalarSub> (a, b); }
  74     static forcedinline vSIMDType mul (vSIMDType a, vSIMDType b) noexcept        { return apply<ScalarMul> (a, b); }
  75     static forcedinline vSIMDType bit_and (vSIMDType a, vSIMDType b) noexcept    { return bitapply<ScalarAnd> (a, b); }
  76     static forcedinline vSIMDType bit_or  (vSIMDType a, vSIMDType b) noexcept    { return bitapply<ScalarOr > (a, b); }
  77     static forcedinline vSIMDType bit_xor (vSIMDType a, vSIMDType b) noexcept    { return bitapply<ScalarXor> (a, b); }
  78     static forcedinline vSIMDType bit_notand (vSIMDType a, vSIMDType b) noexcept { return bitapply<ScalarNot> (a, b); }
  79
  80     static forcedinline vSIMDType min (vSIMDType a, vSIMDType b) noexcept                { return apply<ScalarMin> (a, b); }
  81     static forcedinline vSIMDType max (vSIMDType a, vSIMDType b) noexcept                { return apply<ScalarMax> (a, b); }
  82     static forcedinline vSIMDType equal (vSIMDType a, vSIMDType b) noexcept              { return cmp<ScalarEq > (a, b); }
  83     static forcedinline vSIMDType notEqual (vSIMDType a, vSIMDType b) noexcept           { return cmp<ScalarNeq> (a, b); }
  84     static forcedinline vSIMDType greaterThan (vSIMDType a, vSIMDType b) noexcept        { return cmp<ScalarGt > (a, b); }
  85     static forcedinline vSIMDType greaterThanOrEqual (vSIMDType a, vSIMDType b) noexcept { return cmp<ScalarGeq> (a, b); }
  86
  87     static forcedinline ScalarType get (vSIMDType v, size_t i) noexcept
  88     {
  89         UnionType u {v};
  90         return u.s[i];
  91     }
  92
  93     static forcedinline vSIMDType set (vSIMDType v, size_t i, ScalarType s) noexcept
  94     {
  95         UnionType u {v};
  96
  97         u.s[i] = s;
  98         return u.v;
  99     }
 100
 101     static forcedinline vSIMDType bit_not (vSIMDType av) noexcept
 102     {
 103         UnionMaskType a {av};
 104
 105         for (size_t i = 0; i < n; ++i)
 106             a.m[i] = ~a.m[i];
 107
 108         return a.v;
 109     }
 110
 111     static forcedinline ScalarType sum (vSIMDType av) noexcept
 112     {
 113         UnionType a {av};
 114         auto retval = static_cast<ScalarType> (0);
 115
 116         for (size_t i = 0; i < n; ++i)
 117             retval = static_cast<ScalarType> (retval + a.s[i]);
 118
 119         return retval;
 120     }
 121
 122     static forcedinline vSIMDType truncate (vSIMDType av) noexcept
 123     {
 124         UnionType a {av};
 125
 126         for (size_t i = 0; i < n; ++i)
 127             a.s[i] = static_cast<ScalarType> (static_cast<int> (a.s[i]));
 128
 129         return a.v;
 130     }
 131
 132     static forcedinline vSIMDType multiplyAdd (vSIMDType av, vSIMDType bv, vSIMDType cv) noexcept
 133     {
 134         UnionType a {av}, b {bv}, c {cv};
 135
 136         for (size_t i = 0; i < n; ++i)
 137             a.s[i] += b.s[i] * c.s[i];
 138
 139         return a.v;
 140     }
 141
 142     //==============================================================================
 143     static forcedinline bool allEqual (vSIMDType av, vSIMDType bv) noexcept
 144     {
 145         UnionType a {av}, b {bv};
 146
 147         for (size_t i = 0; i < n; ++i)
 148             if (a.s[i] != b.s[i])
 149                 return false;
 150
 151         return true;
 152     }
 153
 154     //==============================================================================
 155     static forcedinline vSIMDType cmplxmul (vSIMDType av, vSIMDType bv) noexcept
 156     {
 157         UnionType a {av}, b {bv}, r;
 158
 159         const int m = n >> 1;
 160         for (int i = 0; i < m; ++i)
 161         {
 162             std::complex<ScalarType> result
 163                   = std::complex<ScalarType> (a.s[i<<1], a.s[(i<<1)|1])
 164                   * std::complex<ScalarType> (b.s[i<<1], b.s[(i<<1)|1]);
 165
 166             r.s[i<<1]     = result.real();
 167             r.s[(i<<1)|1] = result.imag();
 168         }
 169
 170         return r.v;
 171     }
 172
 173     struct ScalarAdd { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return a + b; } };
 174     struct ScalarSub { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return a - b; } };
 175     struct ScalarMul { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return a * b; } };
 176     struct ScalarMin { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return jmin (a, b); } };
 177     struct ScalarMax { static forcedinline ScalarType   op (ScalarType a, ScalarType b)   noexcept { return jmax (a, b); } };
 178     struct ScalarAnd { static forcedinline MaskType     op (MaskType a,   MaskType b)     noexcept { return a & b; } };
 179     struct ScalarOr  { static forcedinline MaskType     op (MaskType a,   MaskType b)     noexcept { return a | b; } };
 180     struct ScalarXor { static forcedinline MaskType     op (MaskType a,   MaskType b)     noexcept { return a ^ b; } };
 181     struct ScalarNot { static forcedinline MaskType     op (MaskType a,   MaskType b)     noexcept { return (~a) & b; } };
 182     struct ScalarEq  { static forcedinline bool         op (ScalarType a, ScalarType b)   noexcept { return (a == b); } };
 183     struct ScalarNeq { static forcedinline bool         op (ScalarType a, ScalarType b)   noexcept { return (a != b); } };
 184     struct ScalarGt  { static forcedinline bool         op (ScalarType a, ScalarType b)   noexcept { return (a >  b); } };
 185     struct ScalarGeq { static forcedinline bool         op (ScalarType a, ScalarType b)   noexcept { return (a >= b); } };
 186
 187     // generic apply routines for operations above
 188     template <typename Op>
 189     static forcedinline vSIMDType apply (vSIMDType av, vSIMDType bv) noexcept
 190     {
 191         UnionType a {av}, b {bv};
 192
 193         for (size_t i = 0; i < n; ++i)
 194             a.s[i] = Op::op (a.s[i], b.s[i]);
 195
 196         return a.v;
 197     }
 198
 199     template <typename Op>
 200     static forcedinline vSIMDType cmp (vSIMDType av, vSIMDType bv) noexcept
 201     {
 202         UnionType a {av}, b {bv};
 203         UnionMaskType r;
 204
 205         for (size_t i = 0; i < n; ++i)
 206             r.m[i] = Op::op (a.s[i], b.s[i]) ? static_cast<MaskType> (-1) : static_cast<MaskType> (0);
 207
 208         return r.v;
 209     }
 210
 211     template <typename Op>
 212     static forcedinline vSIMDType bitapply (vSIMDType av, vSIMDType bv) noexcept
 213     {
 214         UnionMaskType a {av}, b {bv};
 215
 216         for (size_t i = 0; i < n; ++i)
 217             a.m[i] = Op::op (a.m[i], b.m[i]);
 218
 219         return a.v;
 220     }
 221
 222     static forcedinline vSIMDType expand (ScalarType s) noexcept
 223     {
 224         UnionType r;
 225
 226         for (size_t i = 0; i < n; ++i)
 227             r.s[i] = s;
 228
 229         return r.v;
 230     }
 231
 232     static forcedinline vSIMDType load (const ScalarType* a) noexcept
 233     {
 234         UnionType r;
 235
 236         for (size_t i = 0; i < n; ++i)
 237             r.s[i] = a[i];
 238
 239         return r.v;
 240     }
 241
 242     static forcedinline void store (vSIMDType av, ScalarType* dest) noexcept
 243     {
 244         UnionType a {av};
 245
 246         for (size_t i = 0; i < n; ++i)
 247             dest[i] = a.s[i];
 248     }
 249
 250     template <unsigned int shuffle_idx>
 251     static forcedinline vSIMDType shuffle (vSIMDType av) noexcept
 252     {
 253         UnionType a {av}, r;
 254
 255         // the compiler will unroll this loop and the index can
 256         // be computed at compile-time, so this will be super fast
 257         for (size_t i = 0; i < n; ++i)
 258             r.s[i] = a.s[(shuffle_idx >> (bits * i)) & mask];
 259
 260         return r.v;
 261     }
 262 };
 263
 264 } // namespace dsp
 265 } // namespace juce