2 * Copyright (C) 2005-2008 by Pieter Palmers
4 * This file is part of FFADO
5 * FFADO = Free Firewire (pro-)audio drivers for linux
7 * FFADO is based upon FreeBoB.
9 * This program is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, either version 2 of the License, or
12 * (at your option) version 3 of the License.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program. If not, see <http://www.gnu.org/licenses/>.
26 #include "AmdtpTransmitStreamProcessor.h"
27 #include "AmdtpPort.h"
28 #include "../StreamProcessorManager.h"
29 #include "devicemanager.h"
31 #include "libutil/Time.h"
32 #include "libutil/float_cast.h"
34 #include "libieee1394/ieee1394service.h"
35 #include "libieee1394/IsoHandlerManager.h"
36 #include "libieee1394/cycletimer.h"
38 #include "libutil/ByteSwap.h"
42 #define likely(x) __builtin_expect((x),1)
43 #define unlikely(x) __builtin_expect((x),0)
45 #define AMDTP_FLOAT_MULTIPLIER (1.0f * ((1<<23) - 1))
50 AmdtpTransmitStreamProcessor::AmdtpTransmitStreamProcessor(FFADODevice
&parent
, int dimension
)
51 : StreamProcessor(parent
, ePT_Transmit
)
52 , m_dimension( dimension
)
54 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
55 , m_send_nodata_payload ( AMDTP_SEND_PAYLOAD_IN_NODATA_XMIT_BY_DEFAULT
)
57 , m_max_cycles_to_transmit_early ( AMDTP_MAX_CYCLES_TO_TRANSMIT_EARLY
)
58 , m_transmit_transfer_delay ( AMDTP_TRANSMIT_TRANSFER_DELAY
)
59 , m_min_cycles_before_presentation ( AMDTP_MIN_CYCLES_BEFORE_PRESENTATION
)
60 , m_nb_audio_ports( 0 )
61 , m_nb_midi_ports( 0 )
64 enum StreamProcessor::eChildReturnValue
65 AmdtpTransmitStreamProcessor::generatePacketHeader (
66 unsigned char *data
, unsigned int *length
,
67 unsigned char *tag
, unsigned char *sy
,
70 __builtin_prefetch(data
, 1, 0); // prefetch events for write, no temporal locality
71 struct iec61883_packet
*packet
= (struct iec61883_packet
*)data
;
72 /* Our node ID can change after a bus reset, so it is best to fetch
73 * our node ID for each packet. */
74 packet
->sid
= m_local_node_id
;
77 packet
->dbs
= m_dimension
;
84 packet
->fmt
= IEC61883_FMT_AMDTP
;
86 *tag
= IEC61883_TAG_WITH_CIP
;
90 uint64_t presentation_time
;
91 unsigned int presentation_cycle
;
92 int cycles_until_presentation
;
94 uint64_t transmit_at_time
;
95 unsigned int transmit_at_cycle
;
96 int cycles_until_transmit
;
98 debugOutputExtreme( DEBUG_LEVEL_ULTRA_VERBOSE
,
99 "Try for cycle %d\n", (int) CYCLE_TIMER_GET_CYCLES(pkt_ctr
) );
100 // check whether the packet buffer has packets for us to send.
101 // the base timestamp is the one of the next sample in the buffer
102 ffado_timestamp_t ts_head_tmp
;
103 m_data_buffer
->getBufferHeadTimestamp( &ts_head_tmp
, &fc
); // thread safe
105 // the timestamp gives us the time at which we want the sample block
106 // to be output by the device
107 presentation_time
= ( uint64_t ) ts_head_tmp
;
109 // now we calculate the time when we have to transmit the sample block
110 transmit_at_time
= substractTicks( presentation_time
, m_transmit_transfer_delay
);
112 // calculate the cycle this block should be presented in
113 // (this is just a virtual calculation since at that time it should
114 // already be in the device's buffer)
115 presentation_cycle
= ( unsigned int ) ( TICKS_TO_CYCLES ( presentation_time
) );
117 // calculate the cycle this block should be transmitted in
118 transmit_at_cycle
= ( unsigned int ) ( TICKS_TO_CYCLES ( transmit_at_time
) );
120 // we can check whether this cycle is within the 'window' we have
121 // to send this packet.
122 // first calculate the number of cycles left before presentation time
123 cycles_until_presentation
= diffCycles ( presentation_cycle
, CYCLE_TIMER_GET_CYCLES(pkt_ctr
) );
125 // we can check whether this cycle is within the 'window' we have
126 // to send this packet.
127 // first calculate the number of cycles left before presentation time
128 cycles_until_transmit
= diffCycles ( transmit_at_cycle
, CYCLE_TIMER_GET_CYCLES(pkt_ctr
) );
130 // two different options:
131 // 1) there are not enough frames for one packet
132 // => determine wether this is a problem, since we might still
133 // have some time to send it
134 // 2) there are enough packets
135 // => determine whether we have to send them in this packet
136 if ( fc
< ( signed int ) m_syt_interval
)
138 // not enough frames in the buffer,
140 // we can still postpone the queueing of the packets
141 // if we are far enough ahead of the presentation time
142 if ( cycles_until_presentation
<= m_min_cycles_before_presentation
)
144 debugOutput( DEBUG_LEVEL_NORMAL
,
145 "Insufficient frames (P): N=%02d, CY=%04d, TC=%04u, CUT=%04d\n",
146 fc
, (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr
),
147 transmit_at_cycle
, cycles_until_transmit
);
154 unsigned int now_cycle
= ( unsigned int ) ( TICKS_TO_CYCLES ( m_1394service
.getCycleTimerTicks() ) );
156 debugOutputExtreme(DEBUG_LEVEL_VERBOSE
,
157 "Insufficient frames (NP): N=%02d, CY=%04d, TC=%04u, CUT=%04d, NOW=%04d\n",
158 fc
, (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr
),
159 transmit_at_cycle
, cycles_until_transmit
, now_cycle
);
162 // there is still time left to send the packet
163 // we want the system to give this packet another go at a later time instant
164 return eCRV_Again
; // note that the raw1394 again system doesn't work as expected
166 // we could wait here for a certain time before trying again. However, this
167 // is not going to work since we then block the iterator thread, hence also
168 // the receiving code, meaning that we are not processing received packets,
169 // and hence there is no progression in the number of frames available.
172 // SleepRelativeUsec(125); // one cycle
173 // goto try_block_of_frames;
175 // or more advanced, calculate how many cycles we are ahead of 'now' and
176 // base the sleep on that.
178 // note that this requires that there is one thread for each IsoHandler,
179 // otherwise we're in the deadlock described above.
184 // there are enough frames, so check the time they are intended for
185 // all frames have a certain 'time window' in which they can be sent
186 // this corresponds to the range of the timestamp mechanism:
187 // we can send a packet 15 cycles in advance of the 'presentation time'
188 // in theory we can send the packet up till one cycle before the presentation time,
189 // however this is not very smart.
191 // There are 3 options:
192 // 1) the frame block is too early
193 // => send an empty packet
194 // 2) the frame block is within the window
196 // 3) the frame block is too late
197 // => discard (and raise xrun?)
198 // get next block of frames and repeat
200 if(cycles_until_transmit
< 0)
203 debugOutput(DEBUG_LEVEL_VERBOSE
,
204 "Too late: CY=%04d, TC=%04u, CUT=%04d, TSP=%011"PRIu64
" (%04u)\n",
205 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr
),
206 transmit_at_cycle
, cycles_until_transmit
,
207 presentation_time
, (unsigned int)TICKS_TO_CYCLES(presentation_time
) );
208 //debugShowBackLogLines(200);
209 // however, if we can send this sufficiently before the presentation
210 // time, it could be harmless.
211 // NOTE: dangerous since the device has no way of reporting that it didn't get
212 // this packet on time.
213 if(cycles_until_presentation
>= m_min_cycles_before_presentation
)
215 // we are not that late and can still try to transmit the packet
216 m_dbc
+= fillDataPacketHeader(packet
, length
, presentation_time
);
217 m_last_timestamp
= presentation_time
;
218 return (fc
< (signed)(2*m_syt_interval
) ? eCRV_Defer
: eCRV_Packet
);
220 else // definitely too late
225 else if(cycles_until_transmit
<= m_max_cycles_to_transmit_early
)
227 // it's time send the packet
228 m_dbc
+= fillDataPacketHeader(packet
, length
, presentation_time
);
229 m_last_timestamp
= presentation_time
;
231 // for timestamp tracing
232 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE
,
233 "XMIT PKT: TSP= %011"PRIu64
" (%04u) (%04u) (%04u)\n",
235 (unsigned int)CYCLE_TIMER_GET_CYCLES(pkt_ctr
),
236 presentation_cycle
, transmit_at_cycle
);
238 return (fc
< (signed)(m_syt_interval
) ? eCRV_Defer
: eCRV_Packet
);
242 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE
,
243 "Too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011"PRIu64
" (%04u), TSP=%011"PRId64
" (%04u)\n",
244 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr
),
245 transmit_at_cycle
, cycles_until_transmit
,
246 transmit_at_time
, (unsigned int)TICKS_TO_CYCLES(transmit_at_time
),
247 presentation_time
, (unsigned int)TICKS_TO_CYCLES(presentation_time
));
249 if ( cycles_until_transmit
> m_max_cycles_to_transmit_early
+ 1 )
251 debugOutputExtreme(DEBUG_LEVEL_VERY_VERBOSE
,
252 "Way too early: CY=%04u, TC=%04u, CUT=%04d, TST=%011"PRIu64
" (%04u), TSP=%011"PRId64
"(%04u)\n",
253 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr
),
254 transmit_at_cycle
, cycles_until_transmit
,
255 transmit_at_time
, (unsigned int)TICKS_TO_CYCLES(transmit_at_time
),
256 presentation_time
, (unsigned int)TICKS_TO_CYCLES(presentation_time
));
259 // we are too early, send only an empty packet
260 return eCRV_EmptyPacket
;
266 enum StreamProcessor::eChildReturnValue
267 AmdtpTransmitStreamProcessor::generatePacketData (
268 unsigned char *data
, unsigned int *length
)
270 if (m_data_buffer
->readFrames(m_syt_interval
, (char *)(data
+ 8)))
272 debugOutputExtreme(DEBUG_LEVEL_VERBOSE
,
273 "XMIT DATA: TSP= %011"PRIu64
" (%04u)\n",
275 (unsigned int)TICKS_TO_CYCLES(m_last_timestamp
));
277 // debug code to output the packet content
280 quadlet_t
*tmp
= (quadlet_t
*)((char *)(data
+ 8));
282 for(int i
=0; i
<m_syt_interval
; i
++) {
283 cnt
+= snprintf(tmpbuff
+ cnt
, 8192-cnt
, "[%02d] ", i
);
284 for(int j
=0; j
<m_dimension
; j
++) {
285 cnt
+= snprintf(tmpbuff
+ cnt
, 8192-cnt
, "%08X ", *tmp
);
288 cnt
+= snprintf(tmpbuff
+ cnt
, 8192-cnt
, "\n");
290 debugOutput(DEBUG_LEVEL_VERBOSE
, "\n%s\n", tmpbuff
);
294 else return eCRV_XRun
;
297 enum StreamProcessor::eChildReturnValue
298 AmdtpTransmitStreamProcessor::generateSilentPacketHeader (
299 unsigned char *data
, unsigned int *length
,
300 unsigned char *tag
, unsigned char *sy
,
303 struct iec61883_packet
*packet
= ( struct iec61883_packet
* ) data
;
304 debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE
,
305 "XMIT SILENT (cy %04d): TSP=%011"PRIu64
" (%04u)\n",
306 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr
), m_last_timestamp
,
307 (unsigned int)TICKS_TO_CYCLES(m_last_timestamp
));
309 packet
->sid
= m_local_node_id
;
312 packet
->dbs
= m_dimension
;
316 packet
->reserved
= 0;
319 packet
->fmt
= IEC61883_FMT_AMDTP
;
321 *tag
= IEC61883_TAG_WITH_CIP
;
324 m_dbc
+= fillNoDataPacketHeader(packet
, length
);
328 enum StreamProcessor::eChildReturnValue
329 AmdtpTransmitStreamProcessor::generateSilentPacketData (
330 unsigned char *data
, unsigned int *length
)
332 return eCRV_OK
; // no need to do anything
335 enum StreamProcessor::eChildReturnValue
336 AmdtpTransmitStreamProcessor::generateEmptyPacketHeader (
337 unsigned char *data
, unsigned int *length
,
338 unsigned char *tag
, unsigned char *sy
,
341 struct iec61883_packet
*packet
= ( struct iec61883_packet
* ) data
;
342 debugOutputExtreme(DEBUG_LEVEL_ULTRA_VERBOSE
,
343 "XMIT EMPTY (cy %04d): TSP=%011"PRIu64
" (%04u)\n",
344 (int)CYCLE_TIMER_GET_CYCLES(pkt_ctr
), m_last_timestamp
,
345 (unsigned int)TICKS_TO_CYCLES(m_last_timestamp
) );
346 packet
->sid
= m_local_node_id
;
349 packet
->dbs
= m_dimension
;
353 packet
->reserved
= 0;
356 packet
->fmt
= IEC61883_FMT_AMDTP
;
358 *tag
= IEC61883_TAG_WITH_CIP
;
361 m_dbc
+= fillNoDataPacketHeader(packet
, length
);
365 enum StreamProcessor::eChildReturnValue
366 AmdtpTransmitStreamProcessor::generateEmptyPacketData (
367 unsigned char *data
, unsigned int *length
)
369 return eCRV_OK
; // no need to do anything
372 unsigned int AmdtpTransmitStreamProcessor::fillDataPacketHeader (
373 struct iec61883_packet
*packet
, unsigned int* length
,
379 // convert the timestamp to SYT format
380 uint16_t timestamp_SYT
= TICKS_TO_SYT ( ts
);
381 packet
->syt
= CondSwapToBus16 ( timestamp_SYT
);
383 // FIXME: use a precomputed value here
384 *length
= m_syt_interval
*sizeof ( quadlet_t
) *m_dimension
+ 8;
386 return m_syt_interval
;
389 unsigned int AmdtpTransmitStreamProcessor::fillNoDataPacketHeader (
390 struct iec61883_packet
*packet
, unsigned int* length
)
392 // no-data packets have syt=0xFFFF
393 // and (can) have the usual amount of events as dummy data
394 // DBC is not increased
395 packet
->fdf
= IEC61883_FDF_NODATA
;
396 packet
->syt
= 0xffff;
398 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
399 if ( m_send_nodata_payload
)
400 { // no-data packets with payload (NOTE: DICE-II doesn't like that)
401 *length
= 2*sizeof ( quadlet_t
) + m_syt_interval
* m_dimension
* sizeof ( quadlet_t
);
402 return m_syt_interval
;
403 } else { // no-data packets without payload
404 *length
= 2*sizeof ( quadlet_t
);
408 // no-data packets without payload
409 *length
= 2*sizeof ( quadlet_t
);
415 AmdtpTransmitStreamProcessor::getSytInterval() {
416 switch (m_StreamProcessorManager
.getNominalRate()) {
428 debugError("Unsupported rate: %d\n", m_StreamProcessorManager
.getNominalRate());
434 AmdtpTransmitStreamProcessor::getFDF() {
435 switch (m_StreamProcessorManager
.getNominalRate()) {
436 case 32000: return IEC61883_FDF_SFC_32KHZ
;
437 case 44100: return IEC61883_FDF_SFC_44K1HZ
;
438 case 48000: return IEC61883_FDF_SFC_48KHZ
;
439 case 88200: return IEC61883_FDF_SFC_88K2HZ
;
440 case 96000: return IEC61883_FDF_SFC_96KHZ
;
441 case 176400: return IEC61883_FDF_SFC_176K4HZ
;
442 case 192000: return IEC61883_FDF_SFC_192KHZ
;
444 debugError("Unsupported rate: %d\n", m_StreamProcessorManager
.getNominalRate());
449 bool AmdtpTransmitStreamProcessor::prepareChild()
451 debugOutput ( DEBUG_LEVEL_VERBOSE
, "Preparing (%p)...\n", this );
452 m_syt_interval
= getSytInterval();
455 debugOutput ( DEBUG_LEVEL_VERBOSE
, " SYT interval / FDF : %d / %d\n", m_syt_interval
, m_fdf
);
456 #if AMDTP_ALLOW_PAYLOAD_IN_NODATA_XMIT
457 debugOutput ( DEBUG_LEVEL_VERBOSE
, " Send payload in No-Data packets: %s \n", m_send_nodata_payload
?"Yes":"No" );
459 debugOutput ( DEBUG_LEVEL_VERBOSE
, " Max early transmit cycles : %d\n", m_max_cycles_to_transmit_early
);
460 debugOutput ( DEBUG_LEVEL_VERBOSE
, " Transfer delay : %d\n", m_transmit_transfer_delay
);
461 debugOutput ( DEBUG_LEVEL_VERBOSE
, " Min cycles before presentation : %d\n", m_min_cycles_before_presentation
);
467 m_StreamProcessorManager
.getNominalRate(),
471 if (!initPortCache()) {
472 debugError("Could not init port cache\n");
480 * compose the event streams for the packets from the port buffers
482 bool AmdtpTransmitStreamProcessor::processWriteBlock ( char *data
,
483 unsigned int nevents
, unsigned int offset
)
485 // update the variable parts of the cache
489 switch(m_StreamProcessorManager
.getAudioDataType()) {
490 case StreamProcessorManager::eADT_Int24
:
491 encodeAudioPortsInt24((quadlet_t
*)data
, offset
, nevents
);
493 case StreamProcessorManager::eADT_Float
:
494 encodeAudioPortsFloat((quadlet_t
*)data
, offset
, nevents
);
499 encodeMidiPorts((quadlet_t
*)data
, offset
, nevents
);
504 AmdtpTransmitStreamProcessor::transmitSilenceBlock(
505 char *data
, unsigned int nevents
, unsigned int offset
)
507 // no need to update the port cache when transmitting silence since
508 // no dynamic values are used to do so.
509 encodeAudioPortsSilence((quadlet_t
*)data
, offset
, nevents
);
510 encodeMidiPortsSilence((quadlet_t
*)data
, offset
, nevents
);
515 * @brief encodes all audio ports in the cache to events (silent data)
521 AmdtpTransmitStreamProcessor::encodeAudioPortsSilence(quadlet_t
*data
,
523 unsigned int nevents
)
526 quadlet_t
*target_event
;
529 for (i
= 0; i
< m_nb_audio_ports
; i
++) {
530 target_event
= (quadlet_t
*)(data
+ i
);
532 for (j
= 0;j
< nevents
; j
+= 1)
534 *target_event
= CONDSWAPTOBUS32_CONST(0x40000000);
535 target_event
+= m_dimension
;
541 #include <emmintrin.h>
543 // There's no obvious reason to warn about this anymore - jwoithe.
544 // #warning SSE2 build
547 * @brief mux all audio ports to events
553 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t
*data
,
555 unsigned int nevents
)
558 quadlet_t
*target_event
;
561 float * client_buffers
[4];
562 float tmp_values
[4] __attribute__ ((aligned (16)));
563 uint32_t tmp_values_int
[4] __attribute__ ((aligned (16)));
565 // prepare the scratch buffer
566 assert(m_scratch_buffer_size_bytes
> nevents
* 4);
567 memset(m_scratch_buffer
, 0, nevents
* 4);
569 const __m128i label
= _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
570 const __m128i mask
= _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
571 const __m128 mult
= _mm_set_ps(AMDTP_FLOAT_MULTIPLIER
, AMDTP_FLOAT_MULTIPLIER
, AMDTP_FLOAT_MULTIPLIER
, AMDTP_FLOAT_MULTIPLIER
);
573 #if AMDTP_CLIP_FLOATS
574 const __m128 v_max
= _mm_set_ps(1.0, 1.0, 1.0, 1.0);
575 const __m128 v_min
= _mm_set_ps(-1.0, -1.0, -1.0, -1.0);
578 // this assumes that audio ports are sorted by position,
579 // and that there are no gaps
580 for (i
= 0; i
< ((int)m_nb_audio_ports
)-4; i
+= 4) {
581 struct _MBLA_port_cache
*p
;
583 // get the port buffers
584 for (j
=0; j
<4; j
++) {
585 p
= &(m_audio_ports
.at(i
+j
));
586 if(likely(p
->buffer
&& p
->enabled
)) {
587 client_buffers
[j
] = (float *) p
->buffer
;
588 client_buffers
[j
] += offset
;
590 // if a port is disabled or has no valid
591 // buffer, use the scratch buffer (all zero's)
592 client_buffers
[j
] = (float *) m_scratch_buffer
;
596 // the base event for this position
597 target_event
= (quadlet_t
*)(data
+ i
);
598 // process the events
599 for (j
=0;j
< nevents
; j
+= 1)
602 tmp_values
[0] = *(client_buffers
[0]);
603 tmp_values
[1] = *(client_buffers
[1]);
604 tmp_values
[2] = *(client_buffers
[2]);
605 tmp_values
[3] = *(client_buffers
[3]);
607 // now do the SSE based conversion/labeling
608 __m128 v_float
= *((__m128
*)tmp_values
);
609 __m128i
*target
= (__m128i
*)target_event
;
613 #if AMDTP_CLIP_FLOATS
615 v_float
= _mm_max_ps(v_float
, v_min
);
616 v_float
= _mm_min_ps(v_float
, v_max
);
620 v_float
= _mm_mul_ps(v_float
, mult
);
621 // convert to signed integer
622 v_int
= _mm_cvttps_epi32( v_float
);
624 v_int
= _mm_and_si128( v_int
, mask
);
626 v_int
= _mm_or_si128( v_int
, label
);
628 // do endian conversion (SSE is always little endian)
630 v_int
= _mm_or_si128( _mm_slli_epi16( v_int
, 8 ), _mm_srli_epi16( v_int
, 8 ) );
632 v_int
= _mm_or_si128( _mm_slli_epi32( v_int
, 16 ), _mm_srli_epi32( v_int
, 16 ) );
633 // store the packed int
634 // (target misalignment is assumed since we don't know the m_dimension)
635 _mm_storeu_si128 (target
, v_int
);
637 // increment the buffer pointers
643 // go to next target event position
644 target_event
+= m_dimension
;
648 // do remaining ports
649 // NOTE: these can be time-SSE'd
650 for (; i
< (int)m_nb_audio_ports
; i
++) {
651 struct _MBLA_port_cache
&p
= m_audio_ports
.at(i
);
652 target_event
= (quadlet_t
*)(data
+ i
);
654 assert(nevents
+ offset
<= p
.buffer_size
);
657 if(likely(p
.buffer
&& p
.enabled
)) {
658 float *buffer
= (float *)(p
.buffer
);
661 for (j
= 0;j
< nevents
; j
+= 4)
664 tmp_values
[0] = *buffer
;
666 tmp_values
[1] = *buffer
;
668 tmp_values
[2] = *buffer
;
670 tmp_values
[3] = *buffer
;
673 // now do the SSE based conversion/labeling
674 __m128 v_float
= *((__m128
*)tmp_values
);
677 #if AMDTP_CLIP_FLOATS
679 v_float
= _mm_max_ps(v_float
, v_min
);
680 v_float
= _mm_min_ps(v_float
, v_max
);
683 v_float
= _mm_mul_ps(v_float
, mult
);
684 // convert to signed integer
685 v_int
= _mm_cvttps_epi32( v_float
);
687 v_int
= _mm_and_si128( v_int
, mask
);
689 v_int
= _mm_or_si128( v_int
, label
);
691 // do endian conversion (SSE is always little endian)
693 v_int
= _mm_or_si128( _mm_slli_epi16( v_int
, 8 ), _mm_srli_epi16( v_int
, 8 ) );
695 v_int
= _mm_or_si128( _mm_slli_epi32( v_int
, 16 ), _mm_srli_epi32( v_int
, 16 ) );
697 // store the packed int
698 _mm_store_si128 ((__m128i
*)(&tmp_values_int
), v_int
);
700 // increment the buffer pointers
701 *target_event
= tmp_values_int
[0];
702 target_event
+= m_dimension
;
703 *target_event
= tmp_values_int
[1];
704 target_event
+= m_dimension
;
705 *target_event
= tmp_values_int
[2];
706 target_event
+= m_dimension
;
707 *target_event
= tmp_values_int
[3];
708 target_event
+= m_dimension
;
711 // do the remainder of the events
712 for(;j
< nevents
; j
+= 1) {
713 float *in
= (float *)buffer
;
714 #if AMDTP_CLIP_FLOATS
715 // clip directly to the value of a maxed event
716 if(unlikely(*in
> 1.0)) {
717 *target_event
= CONDSWAPTOBUS32_CONST(0x407FFFFF);
718 } else if(unlikely(*in
< -1.0)) {
719 *target_event
= CONDSWAPTOBUS32_CONST(0x40800001);
721 float v
= (*in
) * AMDTP_FLOAT_MULTIPLIER
;
722 unsigned int tmp
= ((int) v
);
723 tmp
= ( tmp
& 0x00FFFFFF ) | 0x40000000;
724 *target_event
= CondSwapToBus32((quadlet_t
)tmp
);
727 float v
= (*in
) * AMDTP_FLOAT_MULTIPLIER
;
728 unsigned int tmp
= ((int) v
);
729 tmp
= ( tmp
& 0x00FFFFFF ) | 0x40000000;
730 *target_event
= CondSwapToBus32((quadlet_t
)tmp
);
733 target_event
+= m_dimension
;
737 for (j
= 0;j
< nevents
; j
+= 1)
739 // hardcoded byte swapped
740 *target_event
= 0x00000040;
741 target_event
+= m_dimension
;
749 * @brief mux all audio ports to events
755 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t
*data
,
757 unsigned int nevents
)
760 quadlet_t
*target_event
;
763 uint32_t *client_buffers
[4];
764 uint32_t tmp_values
[4] __attribute__ ((aligned (16)));
766 // prepare the scratch buffer
767 assert(m_scratch_buffer_size_bytes
> nevents
* 4);
768 memset(m_scratch_buffer
, 0, nevents
* 4);
770 const __m128i label
= _mm_set_epi32 (0x40000000, 0x40000000, 0x40000000, 0x40000000);
771 const __m128i mask
= _mm_set_epi32 (0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF, 0x00FFFFFF);
773 // this assumes that audio ports are sorted by position,
774 // and that there are no gaps
775 for (i
= 0; i
< ((int)m_nb_audio_ports
)-4; i
+= 4) {
776 struct _MBLA_port_cache
*p
;
778 // get the port buffers
779 for (j
=0; j
<4; j
++) {
780 p
= &(m_audio_ports
.at(i
+j
));
781 if(likely(p
->buffer
&& p
->enabled
)) {
782 client_buffers
[j
] = (uint32_t *) p
->buffer
;
783 client_buffers
[j
] += offset
;
785 // if a port is disabled or has no valid
786 // buffer, use the scratch buffer (all zero's)
787 client_buffers
[j
] = (uint32_t *) m_scratch_buffer
;
791 // the base event for this position
792 target_event
= (quadlet_t
*)(data
+ i
);
794 // process the events
795 for (j
=0;j
< nevents
; j
+= 1)
798 tmp_values
[0] = *(client_buffers
[0]);
799 tmp_values
[1] = *(client_buffers
[1]);
800 tmp_values
[2] = *(client_buffers
[2]);
801 tmp_values
[3] = *(client_buffers
[3]);
803 // now do the SSE based conversion/labeling
804 __m128i
*target
= (__m128i
*)target_event
;
805 __m128i v_int
= *((__m128i
*)tmp_values
);;
808 v_int
= _mm_and_si128( v_int
, mask
);
810 v_int
= _mm_or_si128( v_int
, label
);
812 // do endian conversion (SSE is always little endian)
814 v_int
= _mm_or_si128( _mm_slli_epi16( v_int
, 8 ), _mm_srli_epi16( v_int
, 8 ) );
816 v_int
= _mm_or_si128( _mm_slli_epi32( v_int
, 16 ), _mm_srli_epi32( v_int
, 16 ) );
818 // store the packed int
819 // (target misalignment is assumed since we don't know the m_dimension)
820 _mm_storeu_si128 (target
, v_int
);
822 // increment the buffer pointers
828 // go to next target event position
829 target_event
+= m_dimension
;
833 // do remaining ports
834 // NOTE: these can be time-SSE'd
835 for (; i
< ((int)m_nb_audio_ports
); i
++) {
836 struct _MBLA_port_cache
&p
= m_audio_ports
.at(i
);
837 target_event
= (quadlet_t
*)(data
+ i
);
839 assert(nevents
+ offset
<= p
.buffer_size
);
842 if(likely(p
.buffer
&& p
.enabled
)) {
843 uint32_t *buffer
= (uint32_t *)(p
.buffer
);
846 for (j
= 0;j
< nevents
; j
+= 4)
849 tmp_values
[0] = *buffer
;
851 tmp_values
[1] = *buffer
;
853 tmp_values
[2] = *buffer
;
855 tmp_values
[3] = *buffer
;
858 // now do the SSE based conversion/labeling
859 __m128i v_int
= *((__m128i
*)tmp_values
);;
862 v_int
= _mm_and_si128( v_int
, mask
);
864 v_int
= _mm_or_si128( v_int
, label
);
866 // do endian conversion (SSE is always little endian)
868 v_int
= _mm_or_si128( _mm_slli_epi16( v_int
, 8 ), _mm_srli_epi16( v_int
, 8 ) );
870 v_int
= _mm_or_si128( _mm_slli_epi32( v_int
, 16 ), _mm_srli_epi32( v_int
, 16 ) );
872 // store the packed int
873 _mm_store_si128 ((__m128i
*)(&tmp_values
), v_int
);
875 // increment the buffer pointers
876 *target_event
= tmp_values
[0];
877 target_event
+= m_dimension
;
878 *target_event
= tmp_values
[1];
879 target_event
+= m_dimension
;
880 *target_event
= tmp_values
[2];
881 target_event
+= m_dimension
;
882 *target_event
= tmp_values
[3];
883 target_event
+= m_dimension
;
886 // do the remainder of the events
887 for(;j
< nevents
; j
+= 1) {
888 uint32_t in
= (uint32_t)(*buffer
);
889 *target_event
= CondSwapToBus32((quadlet_t
)((in
& 0x00FFFFFF) | 0x40000000));
891 target_event
+= m_dimension
;
895 for (j
= 0;j
< nevents
; j
+= 1)
897 // hardcoded byte swapped
898 *target_event
= 0x00000040;
899 target_event
+= m_dimension
;
908 * @brief mux all audio ports to events
914 AmdtpTransmitStreamProcessor::encodeAudioPortsInt24(quadlet_t
*data
,
916 unsigned int nevents
)
919 quadlet_t
*target_event
;
922 for (i
= 0; i
< m_nb_audio_ports
; i
++) {
923 struct _MBLA_port_cache
&p
= m_audio_ports
.at(i
);
924 target_event
= (quadlet_t
*)(data
+ i
);
926 assert(nevents
+ offset
<= p
.buffer_size
);
929 if(likely(p
.buffer
&& p
.enabled
)) {
930 quadlet_t
*buffer
= (quadlet_t
*)(p
.buffer
);
933 for (j
= 0;j
< nevents
; j
+= 1)
935 uint32_t in
= (uint32_t)(*buffer
);
936 *target_event
= CondSwapToBus32((quadlet_t
)((in
& 0x00FFFFFF) | 0x40000000));
938 target_event
+= m_dimension
;
941 for (j
= 0;j
< nevents
; j
+= 1)
943 *target_event
= CONDSWAPTOBUS32_CONST(0x40000000);
944 target_event
+= m_dimension
;
951 * @brief mux all audio ports to events
957 AmdtpTransmitStreamProcessor::encodeAudioPortsFloat(quadlet_t
*data
,
959 unsigned int nevents
)
962 quadlet_t
*target_event
;
965 for (i
= 0; i
< m_nb_audio_ports
; i
++) {
966 struct _MBLA_port_cache
&p
= m_audio_ports
.at(i
);
967 target_event
= (quadlet_t
*)(data
+ i
);
969 assert(nevents
+ offset
<= p
.buffer_size
);
972 if(likely(p
.buffer
&& p
.enabled
)) {
973 quadlet_t
*buffer
= (quadlet_t
*)(p
.buffer
);
976 for (j
= 0;j
< nevents
; j
+= 1)
978 float *in
= (float *)buffer
;
979 #if AMDTP_CLIP_FLOATS
980 // clip directly to the value of a maxed event
981 if(unlikely(*in
> 1.0)) {
982 *target_event
= CONDSWAPTOBUS32_CONST(0x407FFFFF);
983 } else if(unlikely(*in
< -1.0)) {
984 *target_event
= CONDSWAPTOBUS32_CONST(0x40800001);
986 float v
= (*in
) * AMDTP_FLOAT_MULTIPLIER
;
987 unsigned int tmp
= ((int) v
);
988 tmp
= ( tmp
& 0x00FFFFFF ) | 0x40000000;
989 *target_event
= CondSwapToBus32((quadlet_t
)tmp
);
992 float v
= (*in
) * AMDTP_FLOAT_MULTIPLIER
;
993 unsigned int tmp
= ((int) v
);
994 tmp
= ( tmp
& 0x00FFFFFF ) | 0x40000000;
995 *target_event
= CondSwapToBus32((quadlet_t
)tmp
);
998 target_event
+= m_dimension
;
1001 for (j
= 0;j
< nevents
; j
+= 1)
1003 *target_event
= CONDSWAPTOBUS32_CONST(0x40000000);
1004 target_event
+= m_dimension
;
1012 * @brief encodes all midi ports in the cache to events (silence)
1018 AmdtpTransmitStreamProcessor::encodeMidiPortsSilence(quadlet_t
*data
,
1019 unsigned int offset
,
1020 unsigned int nevents
)
1022 quadlet_t
*target_event
;
1026 for (i
= 0; i
< m_nb_midi_ports
; i
++) {
1027 struct _MIDI_port_cache
&p
= m_midi_ports
.at(i
);
1029 for (j
= p
.location
;j
< nevents
; j
+= 8) {
1030 target_event
= (quadlet_t
*) (data
+ ((j
* m_dimension
) + p
.position
));
1031 *target_event
= CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA
));
1037 * @brief encodes all midi ports in the cache to events
1043 AmdtpTransmitStreamProcessor::encodeMidiPorts(quadlet_t
*data
,
1044 unsigned int offset
,
1045 unsigned int nevents
)
1047 quadlet_t
*target_event
;
1051 for (i
= 0; i
< m_nb_midi_ports
; i
++) {
1052 struct _MIDI_port_cache
&p
= m_midi_ports
.at(i
);
1053 if (p
.buffer
&& p
.enabled
) {
1054 uint32_t *buffer
= (quadlet_t
*)(p
.buffer
);
1057 for (j
= p
.location
;j
< nevents
; j
+= 8) {
1058 target_event
= (quadlet_t
*) (data
+ ((j
* m_dimension
) + p
.position
));
1060 if ( *buffer
& 0xFF000000 ) // we can send a byte
1063 tmpval
= ((*buffer
)<<16) & 0x00FF0000;
1064 tmpval
= IEC61883_AM824_SET_LABEL(tmpval
, IEC61883_AM824_LABEL_MIDI_1X
);
1065 *target_event
= CondSwapToBus32(tmpval
);
1067 debugOutputExtreme( DEBUG_LEVEL_VERBOSE
, "MIDI port %s, pos=%u, loc=%u, nevents=%u, dim=%d\n",
1068 p
.port
->getName().c_str(), p
.position
, p
.location
, nevents
, m_dimension
);
1069 debugOutputExtreme( DEBUG_LEVEL_VERBOSE
, "base=%p, target=%p, value=%08X\n",
1070 data
, target_event
, tmpval
);
1072 // can't send a byte, either because there is no byte,
1073 // or because this would exceed the maximum rate
1074 // FIXME: this can be ifdef optimized since it's a constant
1075 *target_event
= CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA
));
1080 for (j
= p
.location
;j
< nevents
; j
+= 8) {
1081 target_event
= (quadlet_t
*)(data
+ ((j
* m_dimension
) + p
.position
));
1082 __builtin_prefetch(target_event
, 1, 0); // prefetch events for write, no temporal locality
1083 *target_event
= CondSwapToBus32(IEC61883_AM824_SET_LABEL(0, IEC61883_AM824_LABEL_MIDI_NO_DATA
));
1090 AmdtpTransmitStreamProcessor::initPortCache() {
1091 // make use of the fact that audio ports are the first ports in
1092 // the cluster as per AMDTP. so we can sort the ports by position
1093 // and have very efficient lookups:
1094 // m_float_ports.at(i).buffer -> audio stream i buffer
1095 // for midi ports we simply cache all port info since they are (usually) not
1097 m_nb_audio_ports
= 0;
1098 m_audio_ports
.clear();
1100 m_nb_midi_ports
= 0;
1101 m_midi_ports
.clear();
1103 for(PortVectorIterator it
= m_Ports
.begin();
1104 it
!= m_Ports
.end();
1107 AmdtpPortInfo
*pinfo
=dynamic_cast<AmdtpPortInfo
*>(*it
);
1108 assert(pinfo
); // this should not fail!!
1110 switch( pinfo
->getFormat() )
1112 case AmdtpPortInfo::E_MBLA
:
1115 case AmdtpPortInfo::E_SPDIF
: // still unimplemented
1117 case AmdtpPortInfo::E_Midi
:
1126 for (idx
= 0; idx
< m_nb_audio_ports
; idx
++) {
1127 for(PortVectorIterator it
= m_Ports
.begin();
1128 it
!= m_Ports
.end();
1131 AmdtpPortInfo
*pinfo
=dynamic_cast<AmdtpPortInfo
*>(*it
);
1132 debugOutput(DEBUG_LEVEL_VERY_VERBOSE
,
1133 "idx %u: looking at port %s at position %u\n",
1134 idx
, (*it
)->getName().c_str(), pinfo
->getPosition());
1135 if(pinfo
->getPosition() == (unsigned int)idx
) {
1136 struct _MBLA_port_cache p
;
1137 p
.port
= dynamic_cast<AmdtpAudioPort
*>(*it
);
1138 if(p
.port
== NULL
) {
1139 debugError("Port is not an AmdtpAudioPort!\n");
1142 p
.buffer
= NULL
; // to be filled by updatePortCache
1144 p
.buffer_size
= (*it
)->getBufferSize();
1147 m_audio_ports
.push_back(p
);
1148 debugOutput(DEBUG_LEVEL_VERBOSE
,
1149 "Cached port %s at position %u\n",
1150 p
.port
->getName().c_str(), idx
);
1154 debugError("No MBLA port found for position %d\n", idx
);
1160 for(PortVectorIterator it
= m_Ports
.begin();
1161 it
!= m_Ports
.end();
1164 AmdtpPortInfo
*pinfo
=dynamic_cast<AmdtpPortInfo
*>(*it
);
1165 debugOutput(DEBUG_LEVEL_VERY_VERBOSE
,
1166 "idx %u: looking at port %s at position %u, location %u\n",
1167 idx
, (*it
)->getName().c_str(), pinfo
->getPosition(), pinfo
->getLocation());
1168 if ((*it
)->getPortType() == Port::E_Midi
) {
1169 struct _MIDI_port_cache p
;
1170 p
.port
= dynamic_cast<AmdtpMidiPort
*>(*it
);
1171 if(p
.port
== NULL
) {
1172 debugError("Port is not an AmdtpMidiPort!\n");
1175 p
.position
= pinfo
->getPosition();
1176 p
.location
= pinfo
->getLocation();
1177 p
.buffer
= NULL
; // to be filled by updatePortCache
1179 p
.buffer_size
= (*it
)->getBufferSize();
1182 m_midi_ports
.push_back(p
);
1183 debugOutput(DEBUG_LEVEL_VERBOSE
,
1184 "Cached port %s at position %u, location %u\n",
1185 p
.port
->getName().c_str(), p
.position
, p
.location
);
1192 //FIXME: DRY. Needs to be refactored with AmdtpReceiveStreamProcessor
1194 AmdtpTransmitStreamProcessor::updatePortCache() {
1196 for (idx
= 0; idx
< m_nb_audio_ports
; idx
++) {
1197 struct _MBLA_port_cache
& p
= m_audio_ports
.at(idx
);
1198 AmdtpAudioPort
*port
= p
.port
;
1199 p
.buffer
= port
->getBufferAddress();
1200 p
.enabled
= !port
->isDisabled();
1202 p
.buffer_size
= port
->getBufferSize();
1205 for (idx
= 0; idx
< m_nb_midi_ports
; idx
++) {
1206 struct _MIDI_port_cache
& p
= m_midi_ports
.at(idx
);
1207 AmdtpMidiPort
*port
= p
.port
;
1208 p
.buffer
= port
->getBufferAddress();
1209 p
.enabled
= !port
->isDisabled();
1211 p
.buffer_size
= port
->getBufferSize();
1216 } // end of namespace Streaming