Clean up compiler warnings when compiling on 64-bit systems. These are mostly fixing...
[ffado.git] / libffado / src / libstreaming / amdtp / AmdtpTransmitStreamProcessor.cpp
blob90cda5142f6740900b5c579551858106e9c30ec3
1 /*
2 * Copyright (C) 2005-2008 by Pieter Palmers
4 * This file is part of FFADO
5 * FFADO = Free Firewire (pro-)audio drivers for linux
7 * FFADO is based upon FreeBoB.
9 * This program is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, either version 2 of the License, or
12 * (at your option) version 3 of the License.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "config.h"
26 #include "AmdtpTransmitStreamProcessor.h"
27 #include "AmdtpPort.h"
28 #include "../StreamProcessorManager.h"
29 #include "devicemanager.h"
31 #include "libutil/Time.h"
32 #include "libutil/float_cast.h"
34 #include "libieee1394/ieee1394service.h"
35 #include "libieee1394/IsoHandlerManager.h"
36 #include "libieee1394/cycletimer.h"
38 #include "libutil/ByteSwap.h"
39 #include <assert.h>
40 #include <cstring>
42 #define likely(x) __builtin_expect((x),1)
43 #define unlikely(x) __builtin_expect((x),0)
45 #define AMDTP_FLOAT_MULTIPLIER (1.0f * ((1<<23) - 1))
46 namespace Streaming
49 /* transmit */
50 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice &parent, int dimension)
51 : StreamProcessor(parent, ePT_Transmit)
52 , m_dimension( dimension )
53 , m_dbc( 0 )
54 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
55 , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT )
56 #endif
57 , m_max_cycles_to_transmit_early ( AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY )
58 , m_transmit_transfer_delay ( AMDTP_TRANSMIT_TRANSFER_DELAY )
59 , m_min_cycles_before_presentation ( AMDTP_MIN_CYCLES_BEFORE_PRESENTATION )
60 , m_nb_audio_ports( 0 )
61 , m_nb_midi_ports( 0 )
64 enum StreamProcessor::eChildReturnValue
65 AmdtpTransmitStreamProcessor::generatePacketHeader (
66 unsigned char *data, unsigned int *length,
67 unsigned char *tag, unsigned char *sy,
68 uint32_t pkt_ctr )
70 __builtin_prefetch(data, 1, 0); // prefetch events for write, no temporal locality
71 struct iec61883_packet *packet = (struct iec61883_packet *)data;
72 /* Our node ID can change after a bus reset, so it is best to fetch
73 * our node ID for each packet. */
74 packet->sid = m_local_node_id;
76 packet->eoh0 = 0;
77 packet->dbs = m_dimension;
78 packet->fn = 0;
79 packet->qpc = 0;
80 packet->sph = 0;
81 packet->reserved = 0;
82 packet->dbc = m_dbc;
83 packet->eoh1 = 2;
84 packet->fmt = IEC61883_FMT_AMDTP;
86 *tag = IEC61883_TAG_WITH_CIP;
87 *sy = 0;
89 signed int fc;
90 uint64_t presentation_time;
91 unsigned int presentation_cycle;
92 int cycles_until_presentation;
94 uint64_t transmit_at_time;
95 unsigned int transmit_at_cycle;
96 int cycles_until_transmit;
98 debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE,
99 "Try for cycle %d\n", (int) CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
100 // check whether the packet buffer has packets for us to send.
101 // the base timestamp is the one of the next sample in the buffer
102 ffado_timestamp_t ts_head_tmp;
103 m_data_buffer->getBufferHeadTimestamp( &ts_head_tmp, &fc ); // thread safe
105 // the timestamp gives us the time at which we want the sample block
106 // to be output by the device
107 presentation_time = ( uint64_t ) ts_head_tmp;
109 // now we calculate the time when we have to transmit the sample block
110 transmit_at_time = substractTicks( presentation_time, m_transmit_transfer_delay );
112 // calculate the cycle this block should be presented in
113 // (this is just a virtual calculation since at that time it should
114 // already be in the device's buffer)
115 presentation_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time ) );
117 // calculate the cycle this block should be transmitted in
118 transmit_at_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time ) );
120 // we can check whether this cycle is within the 'window' we have
121 // to send this packet.
122 // first calculate the number of cycles left before presentation time
123 cycles_until_presentation = diffCycles ( presentation_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
125 // we can check whether this cycle is within the 'window' we have
126 // to send this packet.
127 // first calculate the number of cycles left before presentation time
128 cycles_until_transmit = diffCycles ( transmit_at_cycle, CYCLE_TIMER_GET_CYCLES(pkt_ctr) );
130 // two different options:
131 // 1) there are not enough frames for one packet
132 // => determine wether this is a problem, since we might still
133 // have some time to send it
134 // 2) there are enough packets
135 // => determine whether we have to send them in this packet
136 if ( fc < ( signed int ) m_syt_interval )
138 // not enough frames in the buffer,
140 // we can still postpone the queueing of the packets
141 // if we are far enough ahead of the presentation time
142 if ( cycles_until_presentation <= m_min_cycles_before_presentation )
144 debugOutput( DEBUG_LEVEL_NORMAL,
145 "Insufficient frames (P): N=%02d, CY=%04d, TC=%04u, CUT=%04d\n",
146 fc, (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
147 transmit_at_cycle, cycles_until_transmit );
148 // we are too late
149 return eCRV_XRun;
151 else
153 #if DEBUG_EXTREME
154 unsigned int now_cycle = ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service.getCycleTimerTicks() ) );
156 debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
157 "Insufficient frames (NP): N=%02d, CY=%04d, TC=%04u, CUT=%04d, NOW=%04d\n",
158 fc, (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
159 transmit_at_cycle, cycles_until_transmit, now_cycle );
160 #endif
162 // there is still time left to send the packet
163 // we want the system to give this packet another go at a later time instant
164 return eCRV_Again; // note that the raw1394 again system doesn't work as expected
166 // we could wait here for a certain time before trying again. However, this
167 // is not going to work since we then block the iterator thread, hence also
168 // the receiving code, meaning that we are not processing received packets,
169 // and hence there is no progression in the number of frames available.
171 // for example:
172 // SleepRelativeUsec(125); // one cycle
173 // goto try_block_of_frames;
175 // or more advanced, calculate how many cycles we are ahead of 'now' and
176 // base the sleep on that.
178 // note that this requires that there is one thread for each IsoHandler,
179 // otherwise we're in the deadlock described above.
182 else
184 // there are enough frames, so check the time they are intended for
185 // all frames have a certain 'time window' in which they can be sent
186 // this corresponds to the range of the timestamp mechanism:
187 // we can send a packet 15 cycles in advance of the 'presentation time'
188 // in theory we can send the packet up till one cycle before the presentation time,
189 // however this is not very smart.
191 // There are 3 options:
192 // 1) the frame block is too early
193 // => send an empty packet
194 // 2) the frame block is within the window
195 // => send it
196 // 3) the frame block is too late
197 // => discard (and raise xrun?)
198 // get next block of frames and repeat
200 if(cycles_until_transmit < 0)
202 // we are too late
203 debugOutput(DEBUG_LEVEL_VERBOSE,
204 "Too late: CY=%04d, TC=%04u, CUT=%04d, TSP=%011"PRIu64" (%04u)\n",
205 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
206 transmit_at_cycle, cycles_until_transmit,
207 presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time) );
208 //debugShowBackLogLines(200);
209 // however, if we can send this sufficiently before the presentation
210 // time, it could be harmless.
211 // NOTE: dangerous since the device has no way of reporting that it didn't get
212 // this packet on time.
213 if(cycles_until_presentation >= m_min_cycles_before_presentation)
215 // we are not that late and can still try to transmit the packet
216 m_dbc += fillDataPacketHeader(packet, length, presentation_time);
217 m_last_timestamp = presentation_time;
218 return (fc < (signed)(2*m_syt_interval) ? eCRV_Defer : eCRV_Packet);
220 else // definitely too late
222 return eCRV_XRun;
225 else if(cycles_until_transmit <= m_max_cycles_to_transmit_early)
227 // it's time send the packet
228 m_dbc += fillDataPacketHeader(packet, length, presentation_time);
229 m_last_timestamp = presentation_time;
231 // for timestamp tracing
232 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
233 "XMIT PKT: TSP= %011"PRIu64" (%04u) (%04u) (%04u)\n",
234 presentation_time,
235 (unsigned int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
236 presentation_cycle, transmit_at_cycle);
238 return (fc < (signed)(m_syt_interval) ? eCRV_Defer : eCRV_Packet);
240 else
242 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
243 "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011"PRIu64" (%04u), TSP=%011"PRId64" (%04u)\n",
244 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
245 transmit_at_cycle, cycles_until_transmit,
246 transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
247 presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
248 #ifdef DEBUG
249 if ( cycles_until_transmit > m_max_cycles_to_transmit_early + 1 )
251 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE,
252 "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011"PRIu64" (%04u), TSP=%011"PRId64"(%04u)\n",
253 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr),
254 transmit_at_cycle, cycles_until_transmit,
255 transmit_at_time, (unsigned int)TICKS_TO_CYCLES(transmit_at_time),
256 presentation_time, (unsigned int)TICKS_TO_CYCLES(presentation_time));
258 #endif
259 // we are too early, send only an empty packet
260 return eCRV_EmptyPacket;
263 return eCRV_Invalid;
266 enum StreamProcessor::eChildReturnValue
267 AmdtpTransmitStreamProcessor::generatePacketData (
268 unsigned char *data, unsigned int *length )
270 if (m_data_buffer->readFrames(m_syt_interval, (char *)(data + 8)))
272 debugOutputExtreme(DEBUG_LEVEL_VERBOSE,
273 "XMIT DATA: TSP= %011"PRIu64" (%04u)\n",
274 m_last_timestamp,
275 (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
276 #if 0
277 // debug code to output the packet content
278 char tmpbuff[8192];
279 int cnt=0;
280 quadlet_t *tmp = (quadlet_t *)((char *)(data + 8));
282 for(int i=0; i<m_syt_interval; i++) {
283 cnt += snprintf(tmpbuff + cnt, 8192-cnt, "[%02d] ", i);
284 for(int j=0; j<m_dimension; j++) {
285 cnt += snprintf(tmpbuff + cnt, 8192-cnt, "%08X ", *tmp);
286 tmp++;
288 cnt += snprintf(tmpbuff + cnt, 8192-cnt, "\n");
290 debugOutput(DEBUG_LEVEL_VERBOSE, "\n%s\n", tmpbuff);
291 #endif
292 return eCRV_OK;
294 else return eCRV_XRun;
297 enum StreamProcessor::eChildReturnValue
298 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
299 unsigned char *data, unsigned int *length,
300 unsigned char *tag, unsigned char *sy,
301 uint32_t pkt_ctr )
303 struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
304 debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
305 "XMIT SILENT (cy %04d): TSP=%011"PRIu64" (%04u)\n",
306 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
307 (unsigned int)TICKS_TO_CYCLES(m_last_timestamp));
309 packet->sid = m_local_node_id;
311 packet->eoh0 = 0;
312 packet->dbs = m_dimension;
313 packet->fn = 0;
314 packet->qpc = 0;
315 packet->sph = 0;
316 packet->reserved = 0;
317 packet->dbc = m_dbc;
318 packet->eoh1 = 2;
319 packet->fmt = IEC61883_FMT_AMDTP;
321 *tag = IEC61883_TAG_WITH_CIP;
322 *sy = 0;
324 m_dbc += fillNoDataPacketHeader(packet, length);
325 return eCRV_Packet;
328 enum StreamProcessor::eChildReturnValue
329 AmdtpTransmitStreamProcessor::generateSilentPacketData (
330 unsigned char *data, unsigned int *length )
332 return eCRV_OK; // no need to do anything
335 enum StreamProcessor::eChildReturnValue
336 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
337 unsigned char *data, unsigned int *length,
338 unsigned char *tag, unsigned char *sy,
339 uint32_t pkt_ctr )
341 struct iec61883_packet *packet = ( struct iec61883_packet * ) data;
342 debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE,
343 "XMIT EMPTY (cy %04d): TSP=%011"PRIu64" (%04u)\n",
344 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr), m_last_timestamp,
345 (unsigned int)TICKS_TO_CYCLES(m_last_timestamp) );
346 packet->sid = m_local_node_id;
348 packet->eoh0 = 0;
349 packet->dbs = m_dimension;
350 packet->fn = 0;
351 packet->qpc = 0;
352 packet->sph = 0;
353 packet->reserved = 0;
354 packet->dbc = m_dbc;
355 packet->eoh1 = 2;
356 packet->fmt = IEC61883_FMT_AMDTP;
358 *tag = IEC61883_TAG_WITH_CIP;
359 *sy = 0;
361 m_dbc += fillNoDataPacketHeader(packet, length);
362 return eCRV_OK;
365 enum StreamProcessor::eChildReturnValue
366 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
367 unsigned char *data, unsigned int *length )
369 return eCRV_OK; // no need to do anything
372 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
373 struct iec61883_packet *packet, unsigned int* length,
374 uint32_t ts )
377 packet->fdf = m_fdf;
379 // convert the timestamp to SYT format
380 uint16_t timestamp_SYT = TICKS_TO_SYT ( ts );
381 packet->syt = CondSwapToBus16 ( timestamp_SYT );
383 // FIXME: use a precomputed value here
384 *length = m_syt_interval*sizeof ( quadlet_t ) *m_dimension + 8;
386 return m_syt_interval;
389 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
390 struct iec61883_packet *packet, unsigned int* length )
392 // no-data packets have syt=0xFFFF
393 // and (can) have the usual amount of events as dummy data
394 // DBC is not increased
395 packet->fdf = IEC61883_FDF_NODATA;
396 packet->syt = 0xffff;
398 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
399 if ( m_send_nodata_payload )
400 { // no-data packets with payload (NOTE: DICE-II doesn't like that)
401 *length = 2*sizeof ( quadlet_t ) + m_syt_interval * m_dimension * sizeof ( quadlet_t );
402 return m_syt_interval;
403 } else { // no-data packets without payload
404 *length = 2*sizeof ( quadlet_t );
405 return 0;
407 #else
408 // no-data packets without payload
409 *length = 2*sizeof ( quadlet_t );
410 return 0;
411 #endif
414 unsigned int
415 AmdtpTransmitStreamProcessor::getSytInterval() {
416 switch (m_StreamProcessorManager.getNominalRate()) {
417 case 32000:
418 case 44100:
419 case 48000:
420 return 8;
421 case 88200:
422 case 96000:
423 return 16;
424 case 176400:
425 case 192000:
426 return 32;
427 default:
428 debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
429 return 0;
433 unsigned int
434 AmdtpTransmitStreamProcessor::getFDF() {
435 switch (m_StreamProcessorManager.getNominalRate()) {
436 case 32000: return IEC61883_FDF_SFC_32KHZ;
437 case 44100: return IEC61883_FDF_SFC_44K1HZ;
438 case 48000: return IEC61883_FDF_SFC_48KHZ;
439 case 88200: return IEC61883_FDF_SFC_88K2HZ;
440 case 96000: return IEC61883_FDF_SFC_96KHZ;
441 case 176400: return IEC61883_FDF_SFC_176K4HZ;
442 case 192000: return IEC61883_FDF_SFC_192KHZ;
443 default:
444 debugError("Unsupported rate: %d\n", m_StreamProcessorManager.getNominalRate());
445 return 0;
449 bool AmdtpTransmitStreamProcessor::prepareChild()
451 debugOutput ( DEBUG_LEVEL_VERBOSE, "Preparing (%p)...\n", this );
452 m_syt_interval = getSytInterval();
453 m_fdf = getFDF();
455 debugOutput ( DEBUG_LEVEL_VERBOSE, " SYT interval / FDF : %d / %d\n", m_syt_interval, m_fdf );
456 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
457 debugOutput ( DEBUG_LEVEL_VERBOSE, " Send payload in No-Data packets: %s \n", m_send_nodata_payload?"Yes":"No" );
458 #endif
459 debugOutput ( DEBUG_LEVEL_VERBOSE, " Max early transmit cycles : %d\n", m_max_cycles_to_transmit_early );
460 debugOutput ( DEBUG_LEVEL_VERBOSE, " Transfer delay : %d\n", m_transmit_transfer_delay );
461 debugOutput ( DEBUG_LEVEL_VERBOSE, " Min cycles before presentation : %d\n", m_min_cycles_before_presentation );
463 iec61883_cip_init (
464 &m_cip_status,
465 IEC61883_FMT_AMDTP,
466 m_fdf,
467 m_StreamProcessorManager.getNominalRate(),
468 m_dimension,
469 m_syt_interval );
471 if (!initPortCache()) {
472 debugError("Could not init port cache\n");
473 return false;
476 return true;
480 * compose the event streams for the packets from the port buffers
482 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data,
483 unsigned int nevents, unsigned int offset )
485 // update the variable parts of the cache
486 updatePortCache();
488 // encode audio data
489 switch(m_StreamProcessorManager.getAudioDataType()) {
490 case StreamProcessorManager::eADT_Int24:
491 encodeAudioPortsInt24((quadlet_t *)data, offset, nevents);
492 break;
493 case StreamProcessorManager::eADT_Float:
494 encodeAudioPortsFloat((quadlet_t *)data, offset, nevents);
495 break;
498 // do midi ports
499 encodeMidiPorts((quadlet_t *)data, offset, nevents);
500 return true;
503 bool
504 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
505 char *data, unsigned int nevents, unsigned int offset)
507 // no need to update the port cache when transmitting silence since
508 // no dynamic values are used to do so.
509 encodeAudioPortsSilence((quadlet_t *)data, offset, nevents);
510 encodeMidiPortsSilence((quadlet_t *)data, offset, nevents);
511 return true;
515 * @brief encodes all audio ports in the cache to events (silent data)
516 * @param data
517 * @param offset
518 * @param nevents
520 void
521 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t *data,
522 unsigned int offset,
523 unsigned int nevents)
525 unsigned int j;
526 quadlet_t *target_event;
527 int i;
529 for (i = 0; i < m_nb_audio_ports; i++) {
530 target_event = (quadlet_t *)(data + i);
532 for (j = 0;j < nevents; j += 1)
534 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
535 target_event += m_dimension;
540 #ifdef __SSE2__
541 #include <emmintrin.h>
543 // There's no obvious reason to warn about this anymore - jwoithe.
544 // #warning SSE2 build
547 * @brief mux all audio ports to events
548 * @param data
549 * @param offset
550 * @param nevents
552 void
553 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
554 unsigned int offset,
555 unsigned int nevents)
557 unsigned int j;
558 quadlet_t *target_event;
559 int i;
561 float * client_buffers[4];
562 float tmp_values[4] __attribute__ ((aligned (16)));
563 uint32_t tmp_values_int[4] __attribute__ ((aligned (16)));
565 // prepare the scratch buffer
566 assert(m_scratch_buffer_size_bytes > nevents * 4);
567 memset(m_scratch_buffer, 0, nevents * 4);
569 const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
570 const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
571 const __m128 mult = _mm_set_ps(AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER, AMDTP_FLOAT_MULTIPLIER);
573 #if AMDTP_CLIP_FLOATS
574 const __m128 v_max = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
575 const __m128 v_min = _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
576 #endif
578 // this assumes that audio ports are sorted by position,
579 // and that there are no gaps
580 for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
581 struct _MBLA_port_cache *p;
583 // get the port buffers
584 for (j=0; j<4; j++) {
585 p = &(m_audio_ports.at(i+j));
586 if(likely(p->buffer && p->enabled)) {
587 client_buffers[j] = (float *) p->buffer;
588 client_buffers[j] += offset;
589 } else {
590 // if a port is disabled or has no valid
591 // buffer, use the scratch buffer (all zero's)
592 client_buffers[j] = (float *) m_scratch_buffer;
596 // the base event for this position
597 target_event = (quadlet_t *)(data + i);
598 // process the events
599 for (j=0;j < nevents; j += 1)
601 // read the values
602 tmp_values[0] = *(client_buffers[0]);
603 tmp_values[1] = *(client_buffers[1]);
604 tmp_values[2] = *(client_buffers[2]);
605 tmp_values[3] = *(client_buffers[3]);
607 // now do the SSE based conversion/labeling
608 __m128 v_float = *((__m128*)tmp_values);
609 __m128i *target = (__m128i*)target_event;
610 __m128i v_int;
612 // clip
613 #if AMDTP_CLIP_FLOATS
614 // do SSE clipping
615 v_float = _mm_max_ps(v_float, v_min);
616 v_float = _mm_min_ps(v_float, v_max);
617 #endif
619 // multiply
620 v_float = _mm_mul_ps(v_float, mult);
621 // convert to signed integer
622 v_int = _mm_cvttps_epi32( v_float );
623 // mask
624 v_int = _mm_and_si128( v_int, mask );
625 // label it
626 v_int = _mm_or_si128( v_int, label );
628 // do endian conversion (SSE is always little endian)
629 // do first swap
630 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
631 // do second swap
632 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
633 // store the packed int
634 // (target misalignment is assumed since we don't know the m_dimension)
635 _mm_storeu_si128 (target, v_int);
637 // increment the buffer pointers
638 client_buffers[0]++;
639 client_buffers[1]++;
640 client_buffers[2]++;
641 client_buffers[3]++;
643 // go to next target event position
644 target_event += m_dimension;
648 // do remaining ports
649 // NOTE: these can be time-SSE'd
650 for (; i < (int)m_nb_audio_ports; i++) {
651 struct _MBLA_port_cache &p = m_audio_ports.at(i);
652 target_event = (quadlet_t *)(data + i);
653 #ifdef DEBUG
654 assert(nevents + offset <= p.buffer_size );
655 #endif
657 if(likely(p.buffer && p.enabled)) {
658 float *buffer = (float *)(p.buffer);
659 buffer += offset;
661 for (j = 0;j < nevents; j += 4)
663 // read the values
664 tmp_values[0] = *buffer;
665 buffer++;
666 tmp_values[1] = *buffer;
667 buffer++;
668 tmp_values[2] = *buffer;
669 buffer++;
670 tmp_values[3] = *buffer;
671 buffer++;
673 // now do the SSE based conversion/labeling
674 __m128 v_float = *((__m128*)tmp_values);
675 __m128i v_int;
677 #if AMDTP_CLIP_FLOATS
678 // do SSE clipping
679 v_float = _mm_max_ps(v_float, v_min);
680 v_float = _mm_min_ps(v_float, v_max);
681 #endif
682 // multiply
683 v_float = _mm_mul_ps(v_float, mult);
684 // convert to signed integer
685 v_int = _mm_cvttps_epi32( v_float );
686 // mask
687 v_int = _mm_and_si128( v_int, mask );
688 // label it
689 v_int = _mm_or_si128( v_int, label );
691 // do endian conversion (SSE is always little endian)
692 // do first swap
693 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
694 // do second swap
695 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
697 // store the packed int
698 _mm_store_si128 ((__m128i *)(&tmp_values_int), v_int);
700 // increment the buffer pointers
701 *target_event = tmp_values_int[0];
702 target_event += m_dimension;
703 *target_event = tmp_values_int[1];
704 target_event += m_dimension;
705 *target_event = tmp_values_int[2];
706 target_event += m_dimension;
707 *target_event = tmp_values_int[3];
708 target_event += m_dimension;
711 // do the remainder of the events
712 for(;j < nevents; j += 1) {
713 float *in = (float *)buffer;
714 #if AMDTP_CLIP_FLOATS
715 // clip directly to the value of a maxed event
716 if(unlikely(*in > 1.0)) {
717 *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
718 } else if(unlikely(*in < -1.0)) {
719 *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
720 } else {
721 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
722 unsigned int tmp = ((int) v);
723 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
724 *target_event = CondSwapToBus32((quadlet_t)tmp);
726 #else
727 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
728 unsigned int tmp = ((int) v);
729 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
730 *target_event = CondSwapToBus32((quadlet_t)tmp);
731 #endif
732 buffer++;
733 target_event += m_dimension;
736 } else {
737 for (j = 0;j < nevents; j += 1)
739 // hardcoded byte swapped
740 *target_event = 0x00000040;
741 target_event += m_dimension;
749 * @brief mux all audio ports to events
750 * @param data
751 * @param offset
752 * @param nevents
754 void
755 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
756 unsigned int offset,
757 unsigned int nevents)
759 unsigned int j;
760 quadlet_t *target_event;
761 int i;
763 uint32_t *client_buffers[4];
764 uint32_t tmp_values[4] __attribute__ ((aligned (16)));
766 // prepare the scratch buffer
767 assert(m_scratch_buffer_size_bytes > nevents * 4);
768 memset(m_scratch_buffer, 0, nevents * 4);
770 const __m128i label = _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
771 const __m128i mask = _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
773 // this assumes that audio ports are sorted by position,
774 // and that there are no gaps
775 for (i = 0; i < ((int)m_nb_audio_ports)-4; i += 4) {
776 struct _MBLA_port_cache *p;
778 // get the port buffers
779 for (j=0; j<4; j++) {
780 p = &(m_audio_ports.at(i+j));
781 if(likely(p->buffer && p->enabled)) {
782 client_buffers[j] = (uint32_t *) p->buffer;
783 client_buffers[j] += offset;
784 } else {
785 // if a port is disabled or has no valid
786 // buffer, use the scratch buffer (all zero's)
787 client_buffers[j] = (uint32_t *) m_scratch_buffer;
791 // the base event for this position
792 target_event = (quadlet_t *)(data + i);
794 // process the events
795 for (j=0;j < nevents; j += 1)
797 // read the values
798 tmp_values[0] = *(client_buffers[0]);
799 tmp_values[1] = *(client_buffers[1]);
800 tmp_values[2] = *(client_buffers[2]);
801 tmp_values[3] = *(client_buffers[3]);
803 // now do the SSE based conversion/labeling
804 __m128i *target = (__m128i*)target_event;
805 __m128i v_int = *((__m128i*)tmp_values);;
807 // mask
808 v_int = _mm_and_si128( v_int, mask );
809 // label it
810 v_int = _mm_or_si128( v_int, label );
812 // do endian conversion (SSE is always little endian)
813 // do first swap
814 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
815 // do second swap
816 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
818 // store the packed int
819 // (target misalignment is assumed since we don't know the m_dimension)
820 _mm_storeu_si128 (target, v_int);
822 // increment the buffer pointers
823 client_buffers[0]++;
824 client_buffers[1]++;
825 client_buffers[2]++;
826 client_buffers[3]++;
828 // go to next target event position
829 target_event += m_dimension;
833 // do remaining ports
834 // NOTE: these can be time-SSE'd
835 for (; i < ((int)m_nb_audio_ports); i++) {
836 struct _MBLA_port_cache &p = m_audio_ports.at(i);
837 target_event = (quadlet_t *)(data + i);
838 #ifdef DEBUG
839 assert(nevents + offset <= p.buffer_size );
840 #endif
842 if(likely(p.buffer && p.enabled)) {
843 uint32_t *buffer = (uint32_t *)(p.buffer);
844 buffer += offset;
846 for (j = 0;j < nevents; j += 4)
848 // read the values
849 tmp_values[0] = *buffer;
850 buffer++;
851 tmp_values[1] = *buffer;
852 buffer++;
853 tmp_values[2] = *buffer;
854 buffer++;
855 tmp_values[3] = *buffer;
856 buffer++;
858 // now do the SSE based conversion/labeling
859 __m128i v_int = *((__m128i*)tmp_values);;
861 // mask
862 v_int = _mm_and_si128( v_int, mask );
863 // label it
864 v_int = _mm_or_si128( v_int, label );
866 // do endian conversion (SSE is always little endian)
867 // do first swap
868 v_int = _mm_or_si128( _mm_slli_epi16( v_int, 8 ), _mm_srli_epi16( v_int, 8 ) );
869 // do second swap
870 v_int = _mm_or_si128( _mm_slli_epi32( v_int, 16 ), _mm_srli_epi32( v_int, 16 ) );
872 // store the packed int
873 _mm_store_si128 ((__m128i *)(&tmp_values), v_int);
875 // increment the buffer pointers
876 *target_event = tmp_values[0];
877 target_event += m_dimension;
878 *target_event = tmp_values[1];
879 target_event += m_dimension;
880 *target_event = tmp_values[2];
881 target_event += m_dimension;
882 *target_event = tmp_values[3];
883 target_event += m_dimension;
886 // do the remainder of the events
887 for(;j < nevents; j += 1) {
888 uint32_t in = (uint32_t)(*buffer);
889 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
890 buffer++;
891 target_event += m_dimension;
894 } else {
895 for (j = 0;j < nevents; j += 1)
897 // hardcoded byte swapped
898 *target_event = 0x00000040;
899 target_event += m_dimension;
905 #else
908 * @brief mux all audio ports to events
909 * @param data
910 * @param offset
911 * @param nevents
913 void
914 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t *data,
915 unsigned int offset,
916 unsigned int nevents)
918 unsigned int j;
919 quadlet_t *target_event;
920 int i;
922 for (i = 0; i < m_nb_audio_ports; i++) {
923 struct _MBLA_port_cache &p = m_audio_ports.at(i);
924 target_event = (quadlet_t *)(data + i);
925 #ifdef DEBUG
926 assert(nevents + offset <= p.buffer_size );
927 #endif
929 if(likely(p.buffer && p.enabled)) {
930 quadlet_t *buffer = (quadlet_t *)(p.buffer);
931 buffer += offset;
933 for (j = 0;j < nevents; j += 1)
935 uint32_t in = (uint32_t)(*buffer);
936 *target_event = CondSwapToBus32((quadlet_t)((in & 0x00FFFFFF) | 0x40000000));
937 buffer++;
938 target_event += m_dimension;
940 } else {
941 for (j = 0;j < nevents; j += 1)
943 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
944 target_event += m_dimension;
951 * @brief mux all audio ports to events
952 * @param data
953 * @param offset
954 * @param nevents
956 void
957 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t *data,
958 unsigned int offset,
959 unsigned int nevents)
961 unsigned int j;
962 quadlet_t *target_event;
963 int i;
965 for (i = 0; i < m_nb_audio_ports; i++) {
966 struct _MBLA_port_cache &p = m_audio_ports.at(i);
967 target_event = (quadlet_t *)(data + i);
968 #ifdef DEBUG
969 assert(nevents + offset <= p.buffer_size );
970 #endif
972 if(likely(p.buffer && p.enabled)) {
973 quadlet_t *buffer = (quadlet_t *)(p.buffer);
974 buffer += offset;
976 for (j = 0;j < nevents; j += 1)
978 float *in = (float *)buffer;
979 #if AMDTP_CLIP_FLOATS
980 // clip directly to the value of a maxed event
981 if(unlikely(*in > 1.0)) {
982 *target_event = CONDSWAPTOBUS32_CONST(0x407FFFFF);
983 } else if(unlikely(*in < -1.0)) {
984 *target_event = CONDSWAPTOBUS32_CONST(0x40800001);
985 } else {
986 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
987 unsigned int tmp = ((int) v);
988 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
989 *target_event = CondSwapToBus32((quadlet_t)tmp);
991 #else
992 float v = (*in) * AMDTP_FLOAT_MULTIPLIER;
993 unsigned int tmp = ((int) v);
994 tmp = ( tmp & 0x00FFFFFF ) | 0x40000000;
995 *target_event = CondSwapToBus32((quadlet_t)tmp);
996 #endif
997 buffer++;
998 target_event += m_dimension;
1000 } else {
1001 for (j = 0;j < nevents; j += 1)
1003 *target_event = CONDSWAPTOBUS32_CONST(0x40000000);
1004 target_event += m_dimension;
1009 #endif
1012 * @brief encodes all midi ports in the cache to events (silence)
1013 * @param data
1014 * @param offset
1015 * @param nevents
1017 void
1018 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t *data,
1019 unsigned int offset,
1020 unsigned int nevents)
1022 quadlet_t *target_event;
1023 int i;
1024 unsigned int j;
1026 for (i = 0; i < m_nb_midi_ports; i++) {
1027 struct _MIDI_port_cache &p = m_midi_ports.at(i);
1029 for (j = p.location;j < nevents; j += 8) {
1030 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1031 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1037 * @brief encodes all midi ports in the cache to events
1038 * @param data
1039 * @param offset
1040 * @param nevents
1042 void
1043 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t *data,
1044 unsigned int offset,
1045 unsigned int nevents)
1047 quadlet_t *target_event;
1048 int i;
1049 unsigned int j;
1051 for (i = 0; i < m_nb_midi_ports; i++) {
1052 struct _MIDI_port_cache &p = m_midi_ports.at(i);
1053 if (p.buffer && p.enabled) {
1054 uint32_t *buffer = (quadlet_t *)(p.buffer);
1055 buffer += offset;
1057 for (j = p.location;j < nevents; j += 8) {
1058 target_event = (quadlet_t *) (data + ((j * m_dimension) + p.position));
1060 if ( *buffer & 0xFF000000 ) // we can send a byte
1062 quadlet_t tmpval;
1063 tmpval = ((*buffer)<<16) & 0x00FF0000;
1064 tmpval = IEC61883_AM824_SET_LABEL(tmpval, IEC61883_AM824_LABEL_MIDI_1X);
1065 *target_event = CondSwapToBus32(tmpval);
1067 debugOutputExtreme( DEBUG_LEVEL_VERBOSE, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1068 p.port->getName().c_str(), p.position, p.location, nevents, m_dimension );
1069 debugOutputExtreme( DEBUG_LEVEL_VERBOSE, "base=%p, target=%p, value=%08X\n",
1070 data, target_event, tmpval );
1071 } else {
1072 // can't send a byte, either because there is no byte,
1073 // or because this would exceed the maximum rate
1074 // FIXME: this can be ifdef optimized since it's a constant
1075 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1077 buffer+=8;
1079 } else {
1080 for (j = p.location;j < nevents; j += 8) {
1081 target_event = (quadlet_t *)(data + ((j * m_dimension) + p.position));
1082 __builtin_prefetch(target_event, 1, 0); // prefetch events for write, no temporal locality
1083 *target_event = CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA));
1089 bool
1090 AmdtpTransmitStreamProcessor::initPortCache() {
1091 // make use of the fact that audio ports are the first ports in
1092 // the cluster as per AMDTP. so we can sort the ports by position
1093 // and have very efficient lookups:
1094 // m_float_ports.at(i).buffer -> audio stream i buffer
1095 // for midi ports we simply cache all port info since they are (usually) not
1096 // that numerous
1097 m_nb_audio_ports = 0;
1098 m_audio_ports.clear();
1100 m_nb_midi_ports = 0;
1101 m_midi_ports.clear();
1103 for(PortVectorIterator it = m_Ports.begin();
1104 it != m_Ports.end();
1105 ++it )
1107 AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1108 assert(pinfo); // this should not fail!!
1110 switch( pinfo->getFormat() )
1112 case AmdtpPortInfo::E_MBLA:
1113 m_nb_audio_ports++;
1114 break;
1115 case AmdtpPortInfo::E_SPDIF: // still unimplemented
1116 break;
1117 case AmdtpPortInfo::E_Midi:
1118 m_nb_midi_ports++;
1119 break;
1120 default: // ignore
1121 break;
1125 int idx;
1126 for (idx = 0; idx < m_nb_audio_ports; idx++) {
1127 for(PortVectorIterator it = m_Ports.begin();
1128 it != m_Ports.end();
1129 ++it )
1131 AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1132 debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1133 "idx %u: looking at port %s at position %u\n",
1134 idx, (*it)->getName().c_str(), pinfo->getPosition());
1135 if(pinfo->getPosition() == (unsigned int)idx) {
1136 struct _MBLA_port_cache p;
1137 p.port = dynamic_cast<AmdtpAudioPort *>(*it);
1138 if(p.port == NULL) {
1139 debugError("Port is not an AmdtpAudioPort!\n");
1140 return false;
1142 p.buffer = NULL; // to be filled by updatePortCache
1143 #ifdef DEBUG
1144 p.buffer_size = (*it)->getBufferSize();
1145 #endif
1147 m_audio_ports.push_back(p);
1148 debugOutput(DEBUG_LEVEL_VERBOSE,
1149 "Cached port %s at position %u\n",
1150 p.port->getName().c_str(), idx);
1151 goto next_index;
1154 debugError("No MBLA port found for position %d\n", idx);
1155 return false;
1156 next_index:
1157 continue;
1160 for(PortVectorIterator it = m_Ports.begin();
1161 it != m_Ports.end();
1162 ++it )
1164 AmdtpPortInfo *pinfo=dynamic_cast<AmdtpPortInfo *>(*it);
1165 debugOutput(DEBUG_LEVEL_VERY_VERBOSE,
1166 "idx %u: looking at port %s at position %u, location %u\n",
1167 idx, (*it)->getName().c_str(), pinfo->getPosition(), pinfo->getLocation());
1168 if ((*it)->getPortType() == Port::E_Midi) {
1169 struct _MIDI_port_cache p;
1170 p.port = dynamic_cast<AmdtpMidiPort *>(*it);
1171 if(p.port == NULL) {
1172 debugError("Port is not an AmdtpMidiPort!\n");
1173 return false;
1175 p.position = pinfo->getPosition();
1176 p.location = pinfo->getLocation();
1177 p.buffer = NULL; // to be filled by updatePortCache
1178 #ifdef DEBUG
1179 p.buffer_size = (*it)->getBufferSize();
1180 #endif
1182 m_midi_ports.push_back(p);
1183 debugOutput(DEBUG_LEVEL_VERBOSE,
1184 "Cached port %s at position %u, location %u\n",
1185 p.port->getName().c_str(), p.position, p.location);
1189 return true;
1192 //FIXME: DRY. Needs to be refactored with AmdtpReceiveStreamProcessor
1193 void
1194 AmdtpTransmitStreamProcessor::updatePortCache() {
1195 int idx;
1196 for (idx = 0; idx < m_nb_audio_ports; idx++) {
1197 struct _MBLA_port_cache& p = m_audio_ports.at(idx);
1198 AmdtpAudioPort *port = p.port;
1199 p.buffer = port->getBufferAddress();
1200 p.enabled = !port->isDisabled();
1201 #ifdef DEBUG
1202 p.buffer_size = port->getBufferSize();
1203 #endif
1205 for (idx = 0; idx < m_nb_midi_ports; idx++) {
1206 struct _MIDI_port_cache& p = m_midi_ports.at(idx);
1207 AmdtpMidiPort *port = p.port;
1208 p.buffer = port->getBufferAddress();
1209 p.enabled = !port->isDisabled();
1210 #ifdef DEBUG
1211 p.buffer_size = port->getBufferSize();
1212 #endif
1216 } // end of namespace Streaming