Initial commit.
[libsalac.git] / src / lib / alac / codec / matrix_enc.c
blobe19433059ca65cfcbc6ee2f9d4d08d096b3431fd
1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * @APPLE_APACHE_LICENSE_HEADER_END@
22 File: matrix_enc.c
24 Contains: ALAC mixing/matrixing encode routines.
26 Copyright: (c) 2004-2011 Apple, Inc.
29 #include "matrixlib.h"
30 #include "ALACAudioTypes.h"
32 // up to 24-bit "offset" macros for the individual bytes of a 20/24-bit word
33 #if TARGET_RT_BIG_ENDIAN
34 #define LBYTE 2
35 #define MBYTE 1
36 #define HBYTE 0
37 #else
38 #define LBYTE 0
39 #define MBYTE 1
40 #define HBYTE 2
41 #endif
44 There is no plain middle-side option; instead there are various mixing
45 modes including middle-side, each lossless, as embodied in the mix()
46 and unmix() functions. These functions exploit a generalized middle-side
47 transformation:
49 u := [(rL + (m-r)R)/m];
50 v := L - R;
52 where [ ] denotes integer floor. The (lossless) inverse is
54 L = u + v - [rV/m];
55 R = L - v;
58 // 16-bit routines
60 void mix16( int16_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples, int32_t mixbits, int32_t mixres )
62 int16_t * ip = in;
63 int32_t j;
65 if ( mixres != 0 )
67 int32_t mod = 1 << mixbits;
68 int32_t m2;
70 /* matrixed stereo */
71 m2 = mod - mixres;
72 for ( j = 0; j < numSamples; j++ )
74 int32_t l, r;
76 l = (int32_t) ip[0];
77 r = (int32_t) ip[1];
78 ip += stride;
79 u[j] = (mixres * l + m2 * r) >> mixbits;
80 v[j] = l - r;
83 else
85 /* Conventional separated stereo. */
86 for ( j = 0; j < numSamples; j++ )
88 u[j] = (int32_t) ip[0];
89 v[j] = (int32_t) ip[1];
90 ip += stride;
95 // 20-bit routines
96 // - the 20 bits of data are left-justified in 3 bytes of storage but right-aligned for input/output predictor buffers
98 void mix20( uint8_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples, int32_t mixbits, int32_t mixres )
100 int32_t l, r;
101 uint8_t * ip = in;
102 int32_t j;
104 if ( mixres != 0 )
106 /* matrixed stereo */
107 int32_t mod = 1 << mixbits;
108 int32_t m2 = mod - mixres;
110 for ( j = 0; j < numSamples; j++ )
112 l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
113 l = (l << 8) >> 12;
114 ip += 3;
116 r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
117 r = (r << 8) >> 12;
118 ip += (stride - 1) * 3;
120 u[j] = (mixres * l + m2 * r) >> mixbits;
121 v[j] = l - r;
124 else
126 /* Conventional separated stereo. */
127 for ( j = 0; j < numSamples; j++ )
129 l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
130 u[j] = (l << 8) >> 12;
131 ip += 3;
133 r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
134 v[j] = (r << 8) >> 12;
135 ip += (stride - 1) * 3;
140 // 24-bit routines
141 // - the 24 bits of data are right-justified in the input/output predictor buffers
143 void mix24( uint8_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples,
144 int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
146 int32_t l, r;
147 uint8_t * ip = in;
148 int32_t shift = bytesShifted * 8;
149 uint32_t mask = (1ul << shift) - 1;
150 int32_t j, k;
152 if ( mixres != 0 )
154 /* matrixed stereo */
155 int32_t mod = 1 << mixbits;
156 int32_t m2 = mod - mixres;
158 if ( bytesShifted != 0 )
160 for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
162 l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
163 l = (l << 8) >> 8;
164 ip += 3;
166 r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
167 r = (r << 8) >> 8;
168 ip += (stride - 1) * 3;
170 shiftUV[k + 0] = (uint16_t)(l & mask);
171 shiftUV[k + 1] = (uint16_t)(r & mask);
173 l >>= shift;
174 r >>= shift;
176 u[j] = (mixres * l + m2 * r) >> mixbits;
177 v[j] = l - r;
180 else
182 for ( j = 0; j < numSamples; j++ )
184 l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
185 l = (l << 8) >> 8;
186 ip += 3;
188 r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
189 r = (r << 8) >> 8;
190 ip += (stride - 1) * 3;
192 u[j] = (mixres * l + m2 * r) >> mixbits;
193 v[j] = l - r;
197 else
199 /* Conventional separated stereo. */
200 if ( bytesShifted != 0 )
202 for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
204 l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
205 l = (l << 8) >> 8;
206 ip += 3;
208 r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
209 r = (r << 8) >> 8;
210 ip += (stride - 1) * 3;
212 shiftUV[k + 0] = (uint16_t)(l & mask);
213 shiftUV[k + 1] = (uint16_t)(r & mask);
215 l >>= shift;
216 r >>= shift;
218 u[j] = l;
219 v[j] = r;
222 else
224 for ( j = 0; j < numSamples; j++ )
226 l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
227 u[j] = (l << 8) >> 8;
228 ip += 3;
230 r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
231 v[j] = (r << 8) >> 8;
232 ip += (stride - 1) * 3;
238 // 32-bit routines
239 // - note that these really expect the internal data width to be < 32 but the arrays are 32-bit
240 // - otherwise, the calculations might overflow into the 33rd bit and be lost
241 // - therefore, these routines deal with the specified "unused lower" bytes in the "shift" buffers
243 void mix32( int32_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples,
244 int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
246 int32_t * ip = in;
247 int32_t shift = bytesShifted * 8;
248 uint32_t mask = (1ul << shift) - 1;
249 int32_t l, r;
250 int32_t j, k;
252 if ( mixres != 0 )
254 int32_t mod = 1 << mixbits;
255 int32_t m2;
257 //Assert( bytesShifted != 0 );
259 /* matrixed stereo with shift */
260 m2 = mod - mixres;
261 for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
263 l = ip[0];
264 r = ip[1];
265 ip += stride;
267 shiftUV[k + 0] = (uint16_t)(l & mask);
268 shiftUV[k + 1] = (uint16_t)(r & mask);
270 l >>= shift;
271 r >>= shift;
273 u[j] = (mixres * l + m2 * r) >> mixbits;
274 v[j] = l - r;
277 else
279 if ( bytesShifted == 0 )
281 /* de-interleaving w/o shift */
282 for ( j = 0; j < numSamples; j++ )
284 u[j] = ip[0];
285 v[j] = ip[1];
286 ip += stride;
289 else
291 /* de-interleaving with shift */
292 for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
294 l = ip[0];
295 r = ip[1];
296 ip += stride;
298 shiftUV[k + 0] = (uint16_t)(l & mask);
299 shiftUV[k + 1] = (uint16_t)(r & mask);
301 l >>= shift;
302 r >>= shift;
304 u[j] = l;
305 v[j] = r;
311 // 20/24-bit <-> 32-bit helper routines (not really matrixing but convenient to put here)
313 void copy20ToPredictor( uint8_t * in, uint32_t stride, int32_t * out, int32_t numSamples )
315 uint8_t * ip = in;
316 int32_t j;
318 for ( j = 0; j < numSamples; j++ )
320 int32_t val;
322 // 20-bit values are left-aligned in the 24-bit input buffer but right-aligned in the 32-bit output buffer
323 val = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
324 out[j] = (val << 8) >> 12;
325 ip += stride * 3;
329 void copy24ToPredictor( uint8_t * in, uint32_t stride, int32_t * out, int32_t numSamples )
331 uint8_t * ip = in;
332 int32_t j;
334 for ( j = 0; j < numSamples; j++ )
336 int32_t val;
338 val = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
339 out[j] = (val << 8) >> 8;
340 ip += stride * 3;