Combine some duplicate code to mix each channel
[openal-soft.git] / core / converter.cpp
blob35b1f289e650f2c8fefffc58f4ed308842529631
2 #include "config.h"
4 #include "converter.h"
6 #include <algorithm>
7 #include <cassert>
8 #include <cmath>
9 #include <cstdint>
10 #include <iterator>
11 #include <limits.h>
13 #include "albit.h"
14 #include "albyte.h"
15 #include "alnumeric.h"
16 #include "fpu_ctrl.h"
18 struct CTag;
19 struct CopyTag;
22 namespace {
24 constexpr uint MaxPitch{10};
26 static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
27 static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
28 "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
30 /* Base template left undefined. Should be marked =delete, but Clang 3.8.1
31 * chokes on that given the inline specializations.
33 template<DevFmtType T>
34 inline float LoadSample(DevFmtType_t<T> val) noexcept;
36 template<> inline float LoadSample<DevFmtByte>(DevFmtType_t<DevFmtByte> val) noexcept
37 { return val * (1.0f/128.0f); }
38 template<> inline float LoadSample<DevFmtShort>(DevFmtType_t<DevFmtShort> val) noexcept
39 { return val * (1.0f/32768.0f); }
40 template<> inline float LoadSample<DevFmtInt>(DevFmtType_t<DevFmtInt> val) noexcept
41 { return static_cast<float>(val) * (1.0f/2147483648.0f); }
42 template<> inline float LoadSample<DevFmtFloat>(DevFmtType_t<DevFmtFloat> val) noexcept
43 { return val; }
45 template<> inline float LoadSample<DevFmtUByte>(DevFmtType_t<DevFmtUByte> val) noexcept
46 { return LoadSample<DevFmtByte>(static_cast<int8_t>(val - 128)); }
47 template<> inline float LoadSample<DevFmtUShort>(DevFmtType_t<DevFmtUShort> val) noexcept
48 { return LoadSample<DevFmtShort>(static_cast<int16_t>(val - 32768)); }
49 template<> inline float LoadSample<DevFmtUInt>(DevFmtType_t<DevFmtUInt> val) noexcept
50 { return LoadSample<DevFmtInt>(static_cast<int32_t>(val - 2147483648u)); }
53 template<DevFmtType T>
54 inline void LoadSampleArray(float *RESTRICT dst, const void *src, const size_t srcstep,
55 const size_t samples) noexcept
57 const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
58 for(size_t i{0u};i < samples;i++)
59 dst[i] = LoadSample<T>(ssrc[i*srcstep]);
62 void LoadSamples(float *dst, const void *src, const size_t srcstep, const DevFmtType srctype,
63 const size_t samples) noexcept
65 #define HANDLE_FMT(T) \
66 case T: LoadSampleArray<T>(dst, src, srcstep, samples); break
67 switch(srctype)
69 HANDLE_FMT(DevFmtByte);
70 HANDLE_FMT(DevFmtUByte);
71 HANDLE_FMT(DevFmtShort);
72 HANDLE_FMT(DevFmtUShort);
73 HANDLE_FMT(DevFmtInt);
74 HANDLE_FMT(DevFmtUInt);
75 HANDLE_FMT(DevFmtFloat);
77 #undef HANDLE_FMT
81 template<DevFmtType T>
82 inline DevFmtType_t<T> StoreSample(float) noexcept;
84 template<> inline float StoreSample<DevFmtFloat>(float val) noexcept
85 { return val; }
86 template<> inline int32_t StoreSample<DevFmtInt>(float val) noexcept
87 { return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f)); }
88 template<> inline int16_t StoreSample<DevFmtShort>(float val) noexcept
89 { return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); }
90 template<> inline int8_t StoreSample<DevFmtByte>(float val) noexcept
91 { return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); }
93 /* Define unsigned output variations. */
94 template<> inline uint32_t StoreSample<DevFmtUInt>(float val) noexcept
95 { return static_cast<uint32_t>(StoreSample<DevFmtInt>(val)) + 2147483648u; }
96 template<> inline uint16_t StoreSample<DevFmtUShort>(float val) noexcept
97 { return static_cast<uint16_t>(StoreSample<DevFmtShort>(val) + 32768); }
98 template<> inline uint8_t StoreSample<DevFmtUByte>(float val) noexcept
99 { return static_cast<uint8_t>(StoreSample<DevFmtByte>(val) + 128); }
101 template<DevFmtType T>
102 inline void StoreSampleArray(void *dst, const float *RESTRICT src, const size_t dststep,
103 const size_t samples) noexcept
105 DevFmtType_t<T> *sdst = static_cast<DevFmtType_t<T>*>(dst);
106 for(size_t i{0u};i < samples;i++)
107 sdst[i*dststep] = StoreSample<T>(src[i]);
111 void StoreSamples(void *dst, const float *src, const size_t dststep, const DevFmtType dsttype,
112 const size_t samples) noexcept
114 #define HANDLE_FMT(T) \
115 case T: StoreSampleArray<T>(dst, src, dststep, samples); break
116 switch(dsttype)
118 HANDLE_FMT(DevFmtByte);
119 HANDLE_FMT(DevFmtUByte);
120 HANDLE_FMT(DevFmtShort);
121 HANDLE_FMT(DevFmtUShort);
122 HANDLE_FMT(DevFmtInt);
123 HANDLE_FMT(DevFmtUInt);
124 HANDLE_FMT(DevFmtFloat);
126 #undef HANDLE_FMT
130 template<DevFmtType T>
131 void Mono2Stereo(float *RESTRICT dst, const void *src, const size_t frames) noexcept
133 const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
134 for(size_t i{0u};i < frames;i++)
135 dst[i*2 + 1] = dst[i*2 + 0] = LoadSample<T>(ssrc[i]) * 0.707106781187f;
138 template<DevFmtType T>
139 void Multi2Mono(uint chanmask, const size_t step, const float scale, float *RESTRICT dst,
140 const void *src, const size_t frames) noexcept
142 const DevFmtType_t<T> *ssrc = static_cast<const DevFmtType_t<T>*>(src);
143 std::fill_n(dst, frames, 0.0f);
144 for(size_t c{0};chanmask;++c)
146 if((chanmask&1)) [[likely]]
148 for(size_t i{0u};i < frames;i++)
149 dst[i] += LoadSample<T>(ssrc[i*step + c]);
151 chanmask >>= 1;
153 for(size_t i{0u};i < frames;i++)
154 dst[i] *= scale;
157 } // namespace
159 SampleConverterPtr SampleConverter::Create(DevFmtType srcType, DevFmtType dstType, size_t numchans,
160 uint srcRate, uint dstRate, Resampler resampler)
162 if(numchans < 1 || srcRate < 1 || dstRate < 1)
163 return nullptr;
165 SampleConverterPtr converter{new(FamCount(numchans)) SampleConverter{numchans}};
166 converter->mSrcType = srcType;
167 converter->mDstType = dstType;
168 converter->mSrcTypeSize = BytesFromDevFmt(srcType);
169 converter->mDstTypeSize = BytesFromDevFmt(dstType);
171 converter->mSrcPrepCount = MaxResamplerPadding;
172 converter->mFracOffset = 0;
173 for(auto &chan : converter->mChan)
175 const al::span<float> buffer{chan.PrevSamples};
176 std::fill(buffer.begin(), buffer.end(), 0.0f);
179 /* Have to set the mixer FPU mode since that's what the resampler code expects. */
180 FPUCtl mixer_mode{};
181 auto step = static_cast<uint>(
182 mind(srcRate*double{MixerFracOne}/dstRate + 0.5, MaxPitch*MixerFracOne));
183 converter->mIncrement = maxu(step, 1);
184 if(converter->mIncrement == MixerFracOne)
185 converter->mResample = Resample_<CopyTag,CTag>;
186 else
187 converter->mResample = PrepareResampler(resampler, converter->mIncrement,
188 &converter->mState);
190 return converter;
193 uint SampleConverter::availableOut(uint srcframes) const
195 if(srcframes < 1)
197 /* No output samples if there's no input samples. */
198 return 0;
201 const uint prepcount{mSrcPrepCount};
202 if(prepcount < MaxResamplerPadding && MaxResamplerPadding - prepcount >= srcframes)
204 /* Not enough input samples to generate an output sample. */
205 return 0;
208 uint64_t DataSize64{prepcount};
209 DataSize64 += srcframes;
210 DataSize64 -= MaxResamplerPadding;
211 DataSize64 <<= MixerFracBits;
212 DataSize64 -= mFracOffset;
214 /* If we have a full prep, we can generate at least one sample. */
215 return static_cast<uint>(clampu64((DataSize64 + mIncrement-1)/mIncrement, 1,
216 std::numeric_limits<int>::max()));
219 uint SampleConverter::convert(const void **src, uint *srcframes, void *dst, uint dstframes)
221 const uint SrcFrameSize{static_cast<uint>(mChan.size()) * mSrcTypeSize};
222 const uint DstFrameSize{static_cast<uint>(mChan.size()) * mDstTypeSize};
223 const uint increment{mIncrement};
224 auto SamplesIn = static_cast<const al::byte*>(*src);
225 uint NumSrcSamples{*srcframes};
227 FPUCtl mixer_mode{};
228 uint pos{0};
229 while(pos < dstframes && NumSrcSamples > 0)
231 const uint prepcount{mSrcPrepCount};
232 const uint readable{minu(NumSrcSamples, BufferLineSize - prepcount)};
234 if(prepcount < MaxResamplerPadding && MaxResamplerPadding-prepcount >= readable)
236 /* Not enough input samples to generate an output sample. Store
237 * what we're given for later.
239 for(size_t chan{0u};chan < mChan.size();chan++)
240 LoadSamples(&mChan[chan].PrevSamples[prepcount], SamplesIn + mSrcTypeSize*chan,
241 mChan.size(), mSrcType, readable);
243 mSrcPrepCount = prepcount + readable;
244 NumSrcSamples = 0;
245 break;
248 float *RESTRICT SrcData{mSrcSamples};
249 float *RESTRICT DstData{mDstSamples};
250 uint DataPosFrac{mFracOffset};
251 uint64_t DataSize64{prepcount};
252 DataSize64 += readable;
253 DataSize64 -= MaxResamplerPadding;
254 DataSize64 <<= MixerFracBits;
255 DataSize64 -= DataPosFrac;
257 /* If we have a full prep, we can generate at least one sample. */
258 auto DstSize = static_cast<uint>(
259 clampu64((DataSize64 + increment-1)/increment, 1, BufferLineSize));
260 DstSize = minu(DstSize, dstframes-pos);
262 const uint DataPosEnd{DstSize*increment + DataPosFrac};
263 const uint SrcDataEnd{DataPosEnd>>MixerFracBits};
265 assert(prepcount+readable >= SrcDataEnd);
266 const uint nextprep{minu(prepcount + readable - SrcDataEnd, MaxResamplerPadding)};
268 for(size_t chan{0u};chan < mChan.size();chan++)
270 const al::byte *SrcSamples{SamplesIn + mSrcTypeSize*chan};
271 al::byte *DstSamples = static_cast<al::byte*>(dst) + mDstTypeSize*chan;
273 /* Load the previous samples into the source data first, then the
274 * new samples from the input buffer.
276 std::copy_n(mChan[chan].PrevSamples, prepcount, SrcData);
277 LoadSamples(SrcData + prepcount, SrcSamples, mChan.size(), mSrcType, readable);
279 /* Store as many prep samples for next time as possible, given the
280 * number of output samples being generated.
282 std::copy_n(SrcData+SrcDataEnd, nextprep, mChan[chan].PrevSamples);
283 std::fill(std::begin(mChan[chan].PrevSamples)+nextprep,
284 std::end(mChan[chan].PrevSamples), 0.0f);
286 /* Now resample, and store the result in the output buffer. */
287 const float *ResampledData{mResample(&mState, SrcData+(MaxResamplerPadding>>1),
288 DataPosFrac, increment, {DstData, DstSize})};
290 StoreSamples(DstSamples, ResampledData, mChan.size(), mDstType, DstSize);
293 /* Update the number of prep samples still available, as well as the
294 * fractional offset.
296 mSrcPrepCount = nextprep;
297 mFracOffset = DataPosEnd & MixerFracMask;
299 /* Update the src and dst pointers in case there's still more to do. */
300 const uint srcread{minu(NumSrcSamples, SrcDataEnd + mSrcPrepCount - prepcount)};
301 SamplesIn += SrcFrameSize*srcread;
302 NumSrcSamples -= srcread;
304 dst = static_cast<al::byte*>(dst) + DstFrameSize*DstSize;
305 pos += DstSize;
308 *src = SamplesIn;
309 *srcframes = NumSrcSamples;
311 return pos;
315 void ChannelConverter::convert(const void *src, float *dst, uint frames) const
317 if(mDstChans == DevFmtMono)
319 const float scale{std::sqrt(1.0f / static_cast<float>(al::popcount(mChanMask)))};
320 switch(mSrcType)
322 #define HANDLE_FMT(T) case T: Multi2Mono<T>(mChanMask, mSrcStep, scale, dst, src, frames); break
323 HANDLE_FMT(DevFmtByte);
324 HANDLE_FMT(DevFmtUByte);
325 HANDLE_FMT(DevFmtShort);
326 HANDLE_FMT(DevFmtUShort);
327 HANDLE_FMT(DevFmtInt);
328 HANDLE_FMT(DevFmtUInt);
329 HANDLE_FMT(DevFmtFloat);
330 #undef HANDLE_FMT
333 else if(mChanMask == 0x1 && mDstChans == DevFmtStereo)
335 switch(mSrcType)
337 #define HANDLE_FMT(T) case T: Mono2Stereo<T>(dst, src, frames); break
338 HANDLE_FMT(DevFmtByte);
339 HANDLE_FMT(DevFmtUByte);
340 HANDLE_FMT(DevFmtShort);
341 HANDLE_FMT(DevFmtUShort);
342 HANDLE_FMT(DevFmtInt);
343 HANDLE_FMT(DevFmtUInt);
344 HANDLE_FMT(DevFmtFloat);
345 #undef HANDLE_FMT