Use 0/1 defines for backend and simd macros
[openal-soft.git] / core / converter.cpp
blob97102536f1296863b90cf81b56767a7345a4e914
2 #include "config.h"
4 #include "converter.h"
6 #include <algorithm>
7 #include <cassert>
8 #include <cmath>
9 #include <cstddef>
10 #include <cstdint>
11 #include <iterator>
12 #include <climits>
14 #include "albit.h"
15 #include "alnumeric.h"
16 #include "fpu_ctrl.h"
19 namespace {
21 constexpr uint MaxPitch{10};
23 static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
24 static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
25 "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
27 template<DevFmtType T>
28 constexpr float LoadSample(DevFmtType_t<T> val) noexcept = delete;
30 template<> constexpr float LoadSample<DevFmtByte>(DevFmtType_t<DevFmtByte> val) noexcept
31 { return float(val) * (1.0f/128.0f); }
32 template<> constexpr float LoadSample<DevFmtShort>(DevFmtType_t<DevFmtShort> val) noexcept
33 { return float(val) * (1.0f/32768.0f); }
34 template<> constexpr float LoadSample<DevFmtInt>(DevFmtType_t<DevFmtInt> val) noexcept
35 { return static_cast<float>(val) * (1.0f/2147483648.0f); }
36 template<> constexpr float LoadSample<DevFmtFloat>(DevFmtType_t<DevFmtFloat> val) noexcept
37 { return val; }
39 template<> constexpr float LoadSample<DevFmtUByte>(DevFmtType_t<DevFmtUByte> val) noexcept
40 { return LoadSample<DevFmtByte>(static_cast<int8_t>(val - 128)); }
41 template<> constexpr float LoadSample<DevFmtUShort>(DevFmtType_t<DevFmtUShort> val) noexcept
42 { return LoadSample<DevFmtShort>(static_cast<int16_t>(val - 32768)); }
43 template<> constexpr float LoadSample<DevFmtUInt>(DevFmtType_t<DevFmtUInt> val) noexcept
44 { return LoadSample<DevFmtInt>(static_cast<int32_t>(val - 2147483648u)); }
47 template<DevFmtType T>
48 inline void LoadSampleArray(const al::span<float> dst, const void *src, const size_t channel,
49 const size_t srcstep) noexcept
51 assert(channel < srcstep);
52 const auto srcspan = al::span{static_cast<const DevFmtType_t<T>*>(src), dst.size()*srcstep};
53 auto ssrc = srcspan.cbegin();
54 std::generate(dst.begin(), dst.end(), [&ssrc,channel,srcstep]
56 const float ret{LoadSample<T>(ssrc[channel])};
57 ssrc += ptrdiff_t(srcstep);
58 return ret;
59 });
62 void LoadSamples(const al::span<float> dst, const void *src, const size_t channel,
63 const size_t srcstep, const DevFmtType srctype) noexcept
65 #define HANDLE_FMT(T) \
66 case T: LoadSampleArray<T>(dst, src, channel, srcstep); break
67 switch(srctype)
69 HANDLE_FMT(DevFmtByte);
70 HANDLE_FMT(DevFmtUByte);
71 HANDLE_FMT(DevFmtShort);
72 HANDLE_FMT(DevFmtUShort);
73 HANDLE_FMT(DevFmtInt);
74 HANDLE_FMT(DevFmtUInt);
75 HANDLE_FMT(DevFmtFloat);
77 #undef HANDLE_FMT
81 template<DevFmtType T>
82 inline DevFmtType_t<T> StoreSample(float) noexcept;
84 template<> inline float StoreSample<DevFmtFloat>(float val) noexcept
85 { return val; }
86 template<> inline int32_t StoreSample<DevFmtInt>(float val) noexcept
87 { return fastf2i(std::clamp(val*2147483648.0f, -2147483648.0f, 2147483520.0f)); }
88 template<> inline int16_t StoreSample<DevFmtShort>(float val) noexcept
89 { return static_cast<int16_t>(fastf2i(std::clamp(val*32768.0f, -32768.0f, 32767.0f))); }
90 template<> inline int8_t StoreSample<DevFmtByte>(float val) noexcept
91 { return static_cast<int8_t>(fastf2i(std::clamp(val*128.0f, -128.0f, 127.0f))); }
93 /* Define unsigned output variations. */
94 template<> inline uint32_t StoreSample<DevFmtUInt>(float val) noexcept
95 { return static_cast<uint32_t>(StoreSample<DevFmtInt>(val)) + 2147483648u; }
96 template<> inline uint16_t StoreSample<DevFmtUShort>(float val) noexcept
97 { return static_cast<uint16_t>(StoreSample<DevFmtShort>(val) + 32768); }
98 template<> inline uint8_t StoreSample<DevFmtUByte>(float val) noexcept
99 { return static_cast<uint8_t>(StoreSample<DevFmtByte>(val) + 128); }
101 template<DevFmtType T>
102 inline void StoreSampleArray(void *dst, const al::span<const float> src, const size_t channel,
103 const size_t dststep) noexcept
105 assert(channel < dststep);
106 const auto dstspan = al::span{static_cast<DevFmtType_t<T>*>(dst), src.size()*dststep};
107 auto sdst = dstspan.begin();
108 std::for_each(src.cbegin(), src.cend(), [&sdst,channel,dststep](const float in)
110 sdst[channel] = StoreSample<T>(in);
111 sdst += ptrdiff_t(dststep);
116 void StoreSamples(void *dst, const al::span<const float> src, const size_t channel,
117 const size_t dststep, const DevFmtType dsttype) noexcept
119 #define HANDLE_FMT(T) \
120 case T: StoreSampleArray<T>(dst, src, channel, dststep); break
121 switch(dsttype)
123 HANDLE_FMT(DevFmtByte);
124 HANDLE_FMT(DevFmtUByte);
125 HANDLE_FMT(DevFmtShort);
126 HANDLE_FMT(DevFmtUShort);
127 HANDLE_FMT(DevFmtInt);
128 HANDLE_FMT(DevFmtUInt);
129 HANDLE_FMT(DevFmtFloat);
131 #undef HANDLE_FMT
135 template<DevFmtType T>
136 void Mono2Stereo(const al::span<float> dst, const void *src) noexcept
138 const auto srcspan = al::span{static_cast<const DevFmtType_t<T>*>(src), dst.size()>>1};
139 auto sdst = dst.begin();
140 std::for_each(srcspan.cbegin(), srcspan.cend(), [&sdst](const auto in)
141 { sdst = std::fill_n(sdst, 2, LoadSample<T>(in)*0.707106781187f); });
144 template<DevFmtType T>
145 void Multi2Mono(uint chanmask, const size_t step, const float scale, const al::span<float> dst,
146 const void *src) noexcept
148 const auto srcspan = al::span{static_cast<const DevFmtType_t<T>*>(src), step*dst.size()};
149 std::fill_n(dst.begin(), dst.size(), 0.0f);
150 for(size_t c{0};chanmask;++c)
152 if((chanmask&1)) LIKELY
154 auto ssrc = srcspan.cbegin();
155 std::for_each(dst.begin(), dst.end(), [&ssrc,step,c](float &sample)
157 const float s{LoadSample<T>(ssrc[c])};
158 ssrc += ptrdiff_t(step);
159 sample += s;
162 chanmask >>= 1;
164 std::for_each(dst.begin(), dst.end(), [scale](float &sample) noexcept { sample *= scale; });
167 } // namespace
169 SampleConverterPtr SampleConverter::Create(DevFmtType srcType, DevFmtType dstType, size_t numchans,
170 uint srcRate, uint dstRate, Resampler resampler)
172 if(numchans < 1 || srcRate < 1 || dstRate < 1)
173 return nullptr;
175 SampleConverterPtr converter{new(FamCount(numchans)) SampleConverter{numchans}};
176 converter->mSrcType = srcType;
177 converter->mDstType = dstType;
178 converter->mSrcTypeSize = BytesFromDevFmt(srcType);
179 converter->mDstTypeSize = BytesFromDevFmt(dstType);
181 converter->mSrcPrepCount = MaxResamplerPadding;
182 converter->mFracOffset = 0;
183 for(auto &chan : converter->mChan)
184 chan.PrevSamples.fill(0.0f);
186 /* Have to set the mixer FPU mode since that's what the resampler code expects. */
187 FPUCtl mixer_mode{};
188 const auto step = std::min(std::round(srcRate*double{MixerFracOne}/dstRate),
189 MaxPitch*double{MixerFracOne});
190 converter->mIncrement = std::max(static_cast<uint>(step), 1u);
191 if(converter->mIncrement == MixerFracOne)
193 converter->mResample = [](const InterpState*, const al::span<const float> src, uint,
194 const uint, const al::span<float> dst)
195 { std::copy_n(src.begin()+MaxResamplerEdge, dst.size(), dst.begin()); };
197 else
198 converter->mResample = PrepareResampler(resampler, converter->mIncrement,
199 &converter->mState);
201 return converter;
204 uint SampleConverter::availableOut(uint srcframes) const
206 if(srcframes < 1)
208 /* No output samples if there's no input samples. */
209 return 0;
212 const uint prepcount{mSrcPrepCount};
213 if(prepcount < MaxResamplerPadding && MaxResamplerPadding - prepcount >= srcframes)
215 /* Not enough input samples to generate an output sample. */
216 return 0;
219 uint64_t DataSize64{prepcount};
220 DataSize64 += srcframes;
221 DataSize64 -= MaxResamplerPadding;
222 DataSize64 <<= MixerFracBits;
223 DataSize64 -= mFracOffset;
225 /* If we have a full prep, we can generate at least one sample. */
226 return static_cast<uint>(std::clamp((DataSize64 + mIncrement-1)/mIncrement, 1_u64,
227 uint64_t{std::numeric_limits<int>::max()}));
230 uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint dstframes)
232 const size_t SrcFrameSize{mChan.size() * mSrcTypeSize};
233 const size_t DstFrameSize{mChan.size() * mDstTypeSize};
234 const uint increment{mIncrement};
235 uint NumSrcSamples{*srcframes};
236 auto SamplesIn = al::span{static_cast<const std::byte*>(*src), NumSrcSamples*SrcFrameSize};
237 auto SamplesOut = al::span{static_cast<std::byte*>(dst), dstframes*DstFrameSize};
239 FPUCtl mixer_mode{};
240 uint pos{0};
241 while(pos < dstframes && NumSrcSamples > 0)
243 const uint prepcount{mSrcPrepCount};
244 const uint readable{std::min(NumSrcSamples, uint{BufferLineSize} - prepcount)};
246 if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable)
248 /* Not enough input samples to generate an output sample. Store
249 * what we're given for later.
251 for(size_t chan{0u};chan < mChan.size();chan++)
252 LoadSamples(al::span{mChan[chan].PrevSamples}.subspan(prepcount, readable),
253 SamplesIn.data(), chan, mChan.size(), mSrcType);
255 mSrcPrepCount = prepcount + readable;
256 NumSrcSamples = 0;
257 break;
260 const auto SrcData = al::span<float>{mSrcSamples};
261 const auto DstData = al::span<float>{mDstSamples};
262 uint DataPosFrac{mFracOffset};
263 uint64_t DataSize64{prepcount};
264 DataSize64 += readable;
265 DataSize64 -= MaxResamplerPadding;
266 DataSize64 <<= MixerFracBits;
267 DataSize64 -= DataPosFrac;
269 /* If we have a full prep, we can generate at least one sample. */
270 auto DstSize = static_cast<uint>(std::clamp((DataSize64 + increment-1)/increment, 1_u64,
271 uint64_t{BufferLineSize}));
272 DstSize = std::min(DstSize, dstframes-pos);
274 const uint DataPosEnd{DstSize*increment + DataPosFrac};
275 const uint SrcDataEnd{DataPosEnd>>MixerFracBits};
277 assert(prepcount+readable >= SrcDataEnd);
278 const uint nextprep{std::min(prepcount+readable-SrcDataEnd, MaxResamplerPadding)};
280 for(size_t chan{0u};chan < mChan.size();chan++)
282 /* Load the previous samples into the source data first, then the
283 * new samples from the input buffer.
285 std::copy_n(mChan[chan].PrevSamples.cbegin(), prepcount, SrcData.begin());
286 LoadSamples(SrcData.subspan(prepcount, readable), SamplesIn.data(), chan, mChan.size(),
287 mSrcType);
289 /* Store as many prep samples for next time as possible, given the
290 * number of output samples being generated.
292 auto previter = std::copy_n(SrcData.begin()+ptrdiff_t(SrcDataEnd), nextprep,
293 mChan[chan].PrevSamples.begin());
294 std::fill(previter, mChan[chan].PrevSamples.end(), 0.0f);
296 /* Now resample, and store the result in the output buffer. */
297 mResample(&mState, SrcData, DataPosFrac, increment, DstData.first(DstSize));
299 StoreSamples(SamplesOut.data(), DstData.first(DstSize), chan, mChan.size(), mDstType);
302 /* Update the number of prep samples still available, as well as the
303 * fractional offset.
305 mSrcPrepCount = nextprep;
306 mFracOffset = DataPosEnd & MixerFracMask;
308 /* Update the src and dst pointers in case there's still more to do. */
309 const uint srcread{std::min(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)};
310 SamplesIn = SamplesIn.subspan(SrcFrameSize*srcread);
311 NumSrcSamples -= srcread;
313 SamplesOut = SamplesOut.subspan(DstFrameSize*DstSize);
314 pos += DstSize;
317 *src = SamplesIn.data();
318 *srcframes = NumSrcSamples;
320 return pos;
323 uint SampleConverter::convertPlanar(const void **src, uint *srcframes, void *const*dst, uint dstframes)
325 const auto srcs = al::span{src, mChan.size()};
326 const auto dsts = al::span{dst, mChan.size()};
327 const uint increment{mIncrement};
328 uint NumSrcSamples{*srcframes};
330 FPUCtl mixer_mode{};
331 uint pos{0};
332 while(pos < dstframes && NumSrcSamples > 0)
334 const uint prepcount{mSrcPrepCount};
335 const uint readable{std::min(NumSrcSamples, uint{BufferLineSize} - prepcount)};
337 if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable)
339 /* Not enough input samples to generate an output sample. Store
340 * what we're given for later.
342 for(size_t chan{0u};chan < mChan.size();chan++)
344 auto samples = al::span{static_cast<const std::byte*>(srcs[chan]),
345 NumSrcSamples*size_t{mSrcTypeSize}};
346 LoadSamples(al::span{mChan[chan].PrevSamples}.subspan(prepcount, readable),
347 samples.data(), 0, 1, mSrcType);
348 srcs[chan] = samples.subspan(size_t{mSrcTypeSize}*readable).data();
351 mSrcPrepCount = prepcount + readable;
352 NumSrcSamples = 0;
353 break;
356 const auto SrcData = al::span{mSrcSamples};
357 const auto DstData = al::span{mDstSamples};
358 uint DataPosFrac{mFracOffset};
359 uint64_t DataSize64{prepcount};
360 DataSize64 += readable;
361 DataSize64 -= MaxResamplerPadding;
362 DataSize64 <<= MixerFracBits;
363 DataSize64 -= DataPosFrac;
365 /* If we have a full prep, we can generate at least one sample. */
366 auto DstSize = static_cast<uint>(std::clamp((DataSize64 + increment-1)/increment, 1_u64,
367 uint64_t{BufferLineSize}));
368 DstSize = std::min(DstSize, dstframes-pos);
370 const uint DataPosEnd{DstSize*increment + DataPosFrac};
371 const uint SrcDataEnd{DataPosEnd>>MixerFracBits};
373 assert(prepcount+readable >= SrcDataEnd);
374 const uint nextprep{std::min(prepcount+readable-SrcDataEnd, MaxResamplerPadding)};
376 for(size_t chan{0u};chan < mChan.size();chan++)
378 /* Load the previous samples into the source data first, then the
379 * new samples from the input buffer.
381 auto srciter = std::copy_n(mChan[chan].PrevSamples.cbegin(),prepcount,SrcData.begin());
382 LoadSamples({srciter, readable}, srcs[chan], 0, 1, mSrcType);
384 /* Store as many prep samples for next time as possible, given the
385 * number of output samples being generated.
387 auto previter = std::copy_n(SrcData.begin()+ptrdiff_t(SrcDataEnd), nextprep,
388 mChan[chan].PrevSamples.begin());
389 std::fill(previter, mChan[chan].PrevSamples.end(), 0.0f);
391 /* Now resample, and store the result in the output buffer. */
392 mResample(&mState, SrcData, DataPosFrac, increment, DstData.first(DstSize));
394 auto DstSamples = al::span{static_cast<std::byte*>(dsts[chan]),
395 size_t{mDstTypeSize}*dstframes}.subspan(pos*size_t{mDstTypeSize});
396 StoreSamples(DstSamples.data(), DstData.first(DstSize), 0, 1, mDstType);
399 /* Update the number of prep samples still available, as well as the
400 * fractional offset.
402 mSrcPrepCount = nextprep;
403 mFracOffset = DataPosEnd & MixerFracMask;
405 /* Update the src and dst pointers in case there's still more to do. */
406 const uint srcread{std::min(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)};
407 std::for_each(srcs.begin(), srcs.end(), [this,NumSrcSamples,srcread](const void *&srcref)
409 auto srcspan = al::span{static_cast<const std::byte*>(srcref),
410 size_t{mSrcTypeSize}*NumSrcSamples};
411 srcref = srcspan.subspan(size_t{mSrcTypeSize}*srcread).data();
413 NumSrcSamples -= srcread;
415 pos += DstSize;
418 *srcframes = NumSrcSamples;
420 return pos;
424 void ChannelConverter::convert(const void *src, float *dst, uint frames) const
426 if(mDstChans == DevFmtMono)
428 const float scale{std::sqrt(1.0f / static_cast<float>(al::popcount(mChanMask)))};
429 switch(mSrcType)
431 #define HANDLE_FMT(T) case T: Multi2Mono<T>(mChanMask, mSrcStep, scale, {dst, frames}, src); break
432 HANDLE_FMT(DevFmtByte);
433 HANDLE_FMT(DevFmtUByte);
434 HANDLE_FMT(DevFmtShort);
435 HANDLE_FMT(DevFmtUShort);
436 HANDLE_FMT(DevFmtInt);
437 HANDLE_FMT(DevFmtUInt);
438 HANDLE_FMT(DevFmtFloat);
439 #undef HANDLE_FMT
442 else if(mChanMask == 0x1 && mDstChans == DevFmtStereo)
444 switch(mSrcType)
446 #define HANDLE_FMT(T) case T: Mono2Stereo<T>({dst, frames*2_uz}, src); break
447 HANDLE_FMT(DevFmtByte);
448 HANDLE_FMT(DevFmtUByte);
449 HANDLE_FMT(DevFmtShort);
450 HANDLE_FMT(DevFmtUShort);
451 HANDLE_FMT(DevFmtInt);
452 HANDLE_FMT(DevFmtUInt);
453 HANDLE_FMT(DevFmtFloat);
454 #undef HANDLE_FMT