2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #include <gpxe/if_arp.h>
27 #include <gpxe/iobuf.h>
28 #include <gpxe/netdevice.h>
29 #include <gpxe/infiniband.h>
30 #include <gpxe/ipoib.h>
38 #define IPOIB_MTU 2048
40 /** Number of IPoIB data send work queue entries */
41 #define IPOIB_DATA_NUM_SEND_WQES 2
43 /** Number of IPoIB data receive work queue entries */
44 #define IPOIB_DATA_NUM_RECV_WQES 4
46 /** Number of IPoIB data completion entries */
47 #define IPOIB_DATA_NUM_CQES 8
49 /** Number of IPoIB metadata send work queue entries */
50 #define IPOIB_META_NUM_SEND_WQES 2
52 /** Number of IPoIB metadata receive work queue entries */
53 #define IPOIB_META_NUM_RECV_WQES 2
55 /** Number of IPoIB metadata completion entries */
56 #define IPOIB_META_NUM_CQES 8
58 /** An IPoIB queue set */
59 struct ipoib_queue_set
{
60 /** Completion queue */
61 struct ib_completion_queue
*cq
;
63 struct ib_queue_pair
*qp
;
64 /** Receive work queue fill level */
65 unsigned int recv_fill
;
66 /** Receive work queue maximum fill level */
67 unsigned int recv_max_fill
;
70 /** An IPoIB device */
73 struct net_device
*netdev
;
74 /** Underlying Infiniband device */
75 struct ib_device
*ibdev
;
77 struct ipoib_queue_set data
;
79 struct ipoib_queue_set meta
;
81 struct ib_gid broadcast_gid
;
83 unsigned int broadcast_lid
;
84 /** Joined to broadcast group */
87 unsigned long data_qkey
;
91 * IPoIB path cache entry
93 * This serves a similar role to the ARP cache for Ethernet. (ARP
94 * *is* used on IPoIB; we have two caches to maintain.)
96 struct ipoib_cached_path
{
97 /** Destination GID */
99 /** Destination LID */
107 /** Number of IPoIB path cache entries */
108 #define IPOIB_NUM_CACHED_PATHS 2
110 /** IPoIB path cache */
111 static struct ipoib_cached_path ipoib_path_cache
[IPOIB_NUM_CACHED_PATHS
];
113 /** Oldest IPoIB path cache entry index */
114 static unsigned int ipoib_path_cache_idx
= 0;
116 /** TID half used to identify get path record replies */
117 #define IPOIB_TID_GET_PATH_REC 0x11111111UL
119 /** TID half used to identify multicast member record replies */
120 #define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
122 /** IPoIB metadata TID */
123 static uint32_t ipoib_meta_tid
= 0;
125 /** IPv4 broadcast GID */
126 static const struct ib_gid ipv4_broadcast_gid
= {
127 { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
128 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }
131 /** Maximum time we will wait for the broadcast join to succeed */
132 #define IPOIB_JOIN_MAX_DELAY_MS 1000
134 /****************************************************************************
138 ****************************************************************************
141 /** Broadcast QPN used in IPoIB MAC addresses
143 * This is a guaranteed invalid real QPN
145 #define IPOIB_BROADCAST_QPN 0xffffffffUL
147 /** Broadcast IPoIB address */
148 static struct ipoib_mac ipoib_broadcast
= {
149 .qpn
= ntohl ( IPOIB_BROADCAST_QPN
),
153 * Transmit IPoIB packet
155 * @v iobuf I/O buffer
156 * @v netdev Network device
157 * @v net_protocol Network-layer protocol
158 * @v ll_dest Link-layer destination address
160 * Prepends the IPoIB link-layer header and transmits the packet.
162 static int ipoib_tx ( struct io_buffer
*iobuf
, struct net_device
*netdev
,
163 struct net_protocol
*net_protocol
,
164 const void *ll_dest
) {
165 struct ipoib_hdr
*ipoib_hdr
=
166 iob_push ( iobuf
, sizeof ( *ipoib_hdr
) );
168 /* Build IPoIB header */
169 memcpy ( &ipoib_hdr
->pseudo
.peer
, ll_dest
,
170 sizeof ( ipoib_hdr
->pseudo
.peer
) );
171 ipoib_hdr
->real
.proto
= net_protocol
->net_proto
;
172 ipoib_hdr
->real
.reserved
= 0;
174 /* Hand off to network device */
175 return netdev_tx ( netdev
, iobuf
);
179 * Process received IPoIB packet
181 * @v iobuf I/O buffer
182 * @v netdev Network device
184 * Strips off the IPoIB link-layer header and passes up to the
185 * network-layer protocol.
187 static int ipoib_rx ( struct io_buffer
*iobuf
, struct net_device
*netdev
) {
188 struct ipoib_hdr
*ipoib_hdr
= iobuf
->data
;
191 if ( iob_len ( iobuf
) < sizeof ( *ipoib_hdr
) ) {
192 DBG ( "IPoIB packet too short for link-layer header\n" );
193 DBG_HD ( iobuf
->data
, iob_len ( iobuf
) );
198 /* Strip off IPoIB header */
199 iob_pull ( iobuf
, sizeof ( *ipoib_hdr
) );
201 /* Hand off to network-layer protocol */
202 return net_rx ( iobuf
, netdev
, ipoib_hdr
->real
.proto
,
203 &ipoib_hdr
->pseudo
.peer
);
207 * Transcribe IPoIB address
209 * @v ll_addr Link-layer address
210 * @ret string Link-layer address in human-readable format
212 const char * ipoib_ntoa ( const void *ll_addr
) {
214 const struct ipoib_mac
*mac
= ll_addr
;
216 snprintf ( buf
, sizeof ( buf
), "%08lx:%08lx:%08lx:%08lx:%08lx",
217 htonl ( mac
->qpn
), htonl ( mac
->gid
.u
.dwords
[0] ),
218 htonl ( mac
->gid
.u
.dwords
[1] ),
219 htonl ( mac
->gid
.u
.dwords
[2] ),
220 htonl ( mac
->gid
.u
.dwords
[3] ) );
224 /** IPoIB protocol */
225 struct ll_protocol ipoib_protocol __ll_protocol
= {
227 .ll_proto
= htons ( ARPHRD_INFINIBAND
),
228 .ll_addr_len
= IPOIB_ALEN
,
229 .ll_header_len
= IPOIB_HLEN
,
230 .ll_broadcast
= ( uint8_t * ) &ipoib_broadcast
,
236 /****************************************************************************
238 * IPoIB network device
240 ****************************************************************************
246 * @v ipoib IPoIB device
249 static void ipoib_destroy_qset ( struct ipoib_device
*ipoib
,
250 struct ipoib_queue_set
*qset
) {
251 struct ib_device
*ibdev
= ipoib
->ibdev
;
254 ib_destroy_qp ( ibdev
, qset
->qp
);
256 ib_destroy_cq ( ibdev
, qset
->cq
);
257 memset ( qset
, 0, sizeof ( *qset
) );
263 * @v ipoib IPoIB device
265 * @ret rc Return status code
267 static int ipoib_create_qset ( struct ipoib_device
*ipoib
,
268 struct ipoib_queue_set
*qset
,
269 unsigned int num_cqes
,
270 unsigned int num_send_wqes
,
271 unsigned int num_recv_wqes
,
272 unsigned long qkey
) {
273 struct ib_device
*ibdev
= ipoib
->ibdev
;
276 /* Store queue parameters */
277 qset
->recv_max_fill
= num_recv_wqes
;
279 /* Allocate completion queue */
280 qset
->cq
= ib_create_cq ( ibdev
, num_cqes
);
282 DBGC ( ipoib
, "IPoIB %p could not allocate completion queue\n",
288 /* Allocate queue pair */
289 qset
->qp
= ib_create_qp ( ibdev
, num_send_wqes
, qset
->cq
,
290 num_recv_wqes
, qset
->cq
, qkey
);
292 DBGC ( ipoib
, "IPoIB %p could not allocate queue pair\n",
297 qset
->qp
->owner_priv
= ipoib
->netdev
;
302 ipoib_destroy_qset ( ipoib
, qset
);
307 * Find path cache entry by GID
310 * @ret entry Path cache entry, or NULL
312 static struct ipoib_cached_path
*
313 ipoib_find_cached_path ( struct ib_gid
*gid
) {
314 struct ipoib_cached_path
*path
;
317 for ( i
= 0 ; i
< IPOIB_NUM_CACHED_PATHS
; i
++ ) {
318 path
= &ipoib_path_cache
[i
];
319 if ( memcmp ( &path
->gid
, gid
, sizeof ( *gid
) ) == 0 )
322 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n",
323 htonl ( gid
->u
.dwords
[0] ), htonl ( gid
->u
.dwords
[1] ),
324 htonl ( gid
->u
.dwords
[2] ), htonl ( gid
->u
.dwords
[3] ) );
329 * Transmit path record request
331 * @v ipoib IPoIB device
332 * @v gid Destination GID
333 * @ret rc Return status code
335 static int ipoib_get_path_record ( struct ipoib_device
*ipoib
,
336 struct ib_gid
*gid
) {
337 struct ib_device
*ibdev
= ipoib
->ibdev
;
338 struct io_buffer
*iobuf
;
339 struct ib_mad_path_record
*path_record
;
340 struct ib_address_vector av
;
343 /* Allocate I/O buffer */
344 iobuf
= alloc_iob ( sizeof ( *path_record
) );
347 iob_put ( iobuf
, sizeof ( *path_record
) );
348 path_record
= iobuf
->data
;
349 memset ( path_record
, 0, sizeof ( *path_record
) );
351 /* Construct path record request */
352 path_record
->mad_hdr
.base_version
= IB_MGMT_BASE_VERSION
;
353 path_record
->mad_hdr
.mgmt_class
= IB_MGMT_CLASS_SUBN_ADM
;
354 path_record
->mad_hdr
.class_version
= 2;
355 path_record
->mad_hdr
.method
= IB_MGMT_METHOD_GET
;
356 path_record
->mad_hdr
.attr_id
= htons ( IB_SA_ATTR_PATH_REC
);
357 path_record
->mad_hdr
.tid
[0] = IPOIB_TID_GET_PATH_REC
;
358 path_record
->mad_hdr
.tid
[1] = ipoib_meta_tid
++;
359 path_record
->sa_hdr
.comp_mask
[1] =
360 htonl ( IB_SA_PATH_REC_DGID
| IB_SA_PATH_REC_SGID
);
361 memcpy ( &path_record
->dgid
, gid
, sizeof ( path_record
->dgid
) );
362 memcpy ( &path_record
->sgid
, &ibdev
->port_gid
,
363 sizeof ( path_record
->sgid
) );
365 /* Construct address vector */
366 memset ( &av
, 0, sizeof ( av
) );
367 av
.dlid
= ibdev
->sm_lid
;
368 av
.dest_qp
= IB_SA_QPN
;
369 av
.qkey
= IB_GLOBAL_QKEY
;
371 /* Post send request */
372 if ( ( rc
= ib_post_send ( ibdev
, ipoib
->meta
.qp
, &av
,
374 DBGC ( ipoib
, "IPoIB %p could not send get path record: %s\n",
375 ipoib
, strerror ( rc
) );
384 * Transmit multicast group membership request
386 * @v ipoib IPoIB device
387 * @v gid Multicast GID
388 * @v join Join (rather than leave) group
389 * @ret rc Return status code
391 static int ipoib_mc_member_record ( struct ipoib_device
*ipoib
,
392 struct ib_gid
*gid
, int join
) {
393 struct ib_device
*ibdev
= ipoib
->ibdev
;
394 struct io_buffer
*iobuf
;
395 struct ib_mad_mc_member_record
*mc_member_record
;
396 struct ib_address_vector av
;
399 /* Allocate I/O buffer */
400 iobuf
= alloc_iob ( sizeof ( *mc_member_record
) );
403 iob_put ( iobuf
, sizeof ( *mc_member_record
) );
404 mc_member_record
= iobuf
->data
;
405 memset ( mc_member_record
, 0, sizeof ( *mc_member_record
) );
407 /* Construct path record request */
408 mc_member_record
->mad_hdr
.base_version
= IB_MGMT_BASE_VERSION
;
409 mc_member_record
->mad_hdr
.mgmt_class
= IB_MGMT_CLASS_SUBN_ADM
;
410 mc_member_record
->mad_hdr
.class_version
= 2;
411 mc_member_record
->mad_hdr
.method
=
412 ( join
? IB_MGMT_METHOD_SET
: IB_MGMT_METHOD_DELETE
);
413 mc_member_record
->mad_hdr
.attr_id
= htons ( IB_SA_ATTR_MC_MEMBER_REC
);
414 mc_member_record
->mad_hdr
.tid
[0] = IPOIB_TID_MC_MEMBER_REC
;
415 mc_member_record
->mad_hdr
.tid
[1] = ipoib_meta_tid
++;
416 mc_member_record
->sa_hdr
.comp_mask
[1] =
417 htonl ( IB_SA_MCMEMBER_REC_MGID
| IB_SA_MCMEMBER_REC_PORT_GID
|
418 IB_SA_MCMEMBER_REC_JOIN_STATE
);
419 mc_member_record
->scope__join_state
= 1;
420 memcpy ( &mc_member_record
->mgid
, gid
,
421 sizeof ( mc_member_record
->mgid
) );
422 memcpy ( &mc_member_record
->port_gid
, &ibdev
->port_gid
,
423 sizeof ( mc_member_record
->port_gid
) );
425 /* Construct address vector */
426 memset ( &av
, 0, sizeof ( av
) );
427 av
.dlid
= ibdev
->sm_lid
;
428 av
.dest_qp
= IB_SA_QPN
;
429 av
.qkey
= IB_GLOBAL_QKEY
;
431 /* Post send request */
432 if ( ( rc
= ib_post_send ( ibdev
, ipoib
->meta
.qp
, &av
,
434 DBGC ( ipoib
, "IPoIB %p could not send get path record: %s\n",
435 ipoib
, strerror ( rc
) );
444 * Transmit packet via IPoIB network device
446 * @v netdev Network device
447 * @v iobuf I/O buffer
448 * @ret rc Return status code
450 static int ipoib_transmit ( struct net_device
*netdev
,
451 struct io_buffer
*iobuf
) {
452 struct ipoib_device
*ipoib
= netdev
->priv
;
453 struct ib_device
*ibdev
= ipoib
->ibdev
;
454 struct ipoib_pseudo_hdr
*ipoib_pshdr
= iobuf
->data
;
455 struct ib_address_vector av
;
457 struct ipoib_cached_path
*path
;
461 if ( iob_len ( iobuf
) < sizeof ( *ipoib_pshdr
) ) {
462 DBGC ( ipoib
, "IPoIB %p buffer too short\n", ipoib
);
465 iob_pull ( iobuf
, ( sizeof ( *ipoib_pshdr
) ) );
467 /* Construct address vector */
468 memset ( &av
, 0, sizeof ( av
) );
469 av
.qkey
= IB_GLOBAL_QKEY
;
471 if ( ipoib_pshdr
->peer
.qpn
== htonl ( IPOIB_BROADCAST_QPN
) ) {
472 /* Broadcast address */
473 av
.dest_qp
= IB_BROADCAST_QPN
;
474 av
.dlid
= ipoib
->broadcast_lid
;
475 gid
= &ipoib
->broadcast_gid
;
477 /* Unicast - look in path cache */
478 path
= ipoib_find_cached_path ( &ipoib_pshdr
->peer
.gid
);
480 /* No path entry - get path record */
481 rc
= ipoib_get_path_record ( ipoib
,
482 &ipoib_pshdr
->peer
.gid
);
483 netdev_tx_complete ( netdev
, iobuf
);
486 av
.dest_qp
= ntohl ( ipoib_pshdr
->peer
.qpn
);
487 av
.dlid
= path
->dlid
;
488 av
.rate
= path
->rate
;
490 gid
= &ipoib_pshdr
->peer
.gid
;
492 memcpy ( &av
.gid
, gid
, sizeof ( av
.gid
) );
494 return ib_post_send ( ibdev
, ipoib
->data
.qp
, &av
, iobuf
);
498 * Handle IPoIB data send completion
500 * @v ibdev Infiniband device
502 * @v completion Completion
503 * @v iobuf I/O buffer
505 static void ipoib_data_complete_send ( struct ib_device
*ibdev __unused
,
506 struct ib_queue_pair
*qp
,
507 struct ib_completion
*completion
,
508 struct io_buffer
*iobuf
) {
509 struct net_device
*netdev
= qp
->owner_priv
;
511 netdev_tx_complete_err ( netdev
, iobuf
,
512 ( completion
->syndrome
? -EIO
: 0 ) );
516 * Handle IPoIB data receive completion
518 * @v ibdev Infiniband device
520 * @v completion Completion
521 * @v iobuf I/O buffer
523 static void ipoib_data_complete_recv ( struct ib_device
*ibdev __unused
,
524 struct ib_queue_pair
*qp
,
525 struct ib_completion
*completion
,
526 struct io_buffer
*iobuf
) {
527 struct net_device
*netdev
= qp
->owner_priv
;
528 struct ipoib_device
*ipoib
= netdev
->priv
;
529 struct ipoib_pseudo_hdr
*ipoib_pshdr
;
531 if ( completion
->syndrome
) {
532 netdev_rx_err ( netdev
, iobuf
, -EIO
);
536 iob_put ( iobuf
, completion
->len
);
537 if ( iob_len ( iobuf
) < sizeof ( struct ib_global_route_header
) ) {
538 DBGC ( ipoib
, "IPoIB %p received data packet too short to "
539 "contain GRH\n", ipoib
);
540 DBGC_HD ( ipoib
, iobuf
->data
, iob_len ( iobuf
) );
541 netdev_rx_err ( netdev
, iobuf
, -EIO
);
544 iob_pull ( iobuf
, sizeof ( struct ib_global_route_header
) );
546 if ( iob_len ( iobuf
) < sizeof ( struct ipoib_real_hdr
) ) {
547 DBGC ( ipoib
, "IPoIB %p received data packet too short to "
548 "contain IPoIB header\n", ipoib
);
549 DBGC_HD ( ipoib
, iobuf
->data
, iob_len ( iobuf
) );
550 netdev_rx_err ( netdev
, iobuf
, -EIO
);
554 ipoib_pshdr
= iob_push ( iobuf
, sizeof ( *ipoib_pshdr
) );
555 /* FIXME: fill in a MAC address for the sake of AoE! */
557 netdev_rx ( netdev
, iobuf
);
560 ipoib
->data
.recv_fill
--;
564 * Handle IPoIB metadata send completion
566 * @v ibdev Infiniband device
568 * @v completion Completion
569 * @v iobuf I/O buffer
571 static void ipoib_meta_complete_send ( struct ib_device
*ibdev __unused
,
572 struct ib_queue_pair
*qp
,
573 struct ib_completion
*completion
,
574 struct io_buffer
*iobuf
) {
575 struct net_device
*netdev
= qp
->owner_priv
;
576 struct ipoib_device
*ipoib
= netdev
->priv
;
578 if ( completion
->syndrome
) {
579 DBGC ( ipoib
, "IPoIB %p metadata TX completion error %x\n",
580 ipoib
, completion
->syndrome
);
586 * Handle received IPoIB path record
588 * @v ipoib IPoIB device
589 * @v path_record Path record
591 static void ipoib_recv_path_record ( struct ipoib_device
*ipoib __unused
,
592 struct ib_mad_path_record
*path_record
) {
593 struct ipoib_cached_path
*path
;
595 /* Update path cache entry */
596 path
= &ipoib_path_cache
[ipoib_path_cache_idx
];
597 memcpy ( &path
->gid
, &path_record
->dgid
, sizeof ( path
->gid
) );
598 path
->dlid
= ntohs ( path_record
->dlid
);
599 path
->sl
= ( path_record
->reserved__sl
& 0x0f );
600 path
->rate
= ( path_record
->rate_selector__rate
& 0x3f );
602 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n",
603 htonl ( path
->gid
.u
.dwords
[0] ), htonl ( path
->gid
.u
.dwords
[1] ),
604 htonl ( path
->gid
.u
.dwords
[2] ), htonl ( path
->gid
.u
.dwords
[3] ),
605 path
->dlid
, path
->sl
, path
->rate
);
607 /* Update path cache index */
608 ipoib_path_cache_idx
++;
609 if ( ipoib_path_cache_idx
== IPOIB_NUM_CACHED_PATHS
)
610 ipoib_path_cache_idx
= 0;
614 * Handle received IPoIB multicast membership record
616 * @v ipoib IPoIB device
617 * @v mc_member_record Multicast membership record
619 static void ipoib_recv_mc_member_record ( struct ipoib_device
*ipoib
,
620 struct ib_mad_mc_member_record
*mc_member_record
) {
621 /* Record parameters */
622 ipoib
->broadcast_joined
=
623 ( mc_member_record
->scope__join_state
& 0x0f );
624 ipoib
->data_qkey
= ntohl ( mc_member_record
->qkey
);
625 ipoib
->broadcast_lid
= ntohs ( mc_member_record
->mlid
);
626 DBGC ( ipoib
, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
627 ipoib
, ( ipoib
->broadcast_joined
? "joined" : "left" ),
628 ipoib
->data_qkey
, ipoib
->broadcast_lid
);
632 * Handle IPoIB metadata receive completion
634 * @v ibdev Infiniband device
636 * @v completion Completion
637 * @v iobuf I/O buffer
639 static void ipoib_meta_complete_recv ( struct ib_device
*ibdev __unused
,
640 struct ib_queue_pair
*qp
,
641 struct ib_completion
*completion
,
642 struct io_buffer
*iobuf
) {
643 struct net_device
*netdev
= qp
->owner_priv
;
644 struct ipoib_device
*ipoib
= netdev
->priv
;
647 if ( completion
->syndrome
) {
648 DBGC ( ipoib
, "IPoIB %p metadata RX completion error %x\n",
649 ipoib
, completion
->syndrome
);
653 iob_put ( iobuf
, completion
->len
);
654 if ( iob_len ( iobuf
) < sizeof ( struct ib_global_route_header
) ) {
655 DBGC ( ipoib
, "IPoIB %p received metadata packet too short "
656 "to contain GRH\n", ipoib
);
657 DBGC_HD ( ipoib
, iobuf
->data
, iob_len ( iobuf
) );
660 iob_pull ( iobuf
, sizeof ( struct ib_global_route_header
) );
661 if ( iob_len ( iobuf
) < sizeof ( *mad
) ) {
662 DBGC ( ipoib
, "IPoIB %p received metadata packet too short "
663 "to contain reply\n", ipoib
);
664 DBGC_HD ( ipoib
, iobuf
->data
, iob_len ( iobuf
) );
669 if ( mad
->mad_hdr
.status
!= 0 ) {
670 DBGC ( ipoib
, "IPoIB %p metadata RX err status %04x\n",
671 ipoib
, ntohs ( mad
->mad_hdr
.status
) );
675 switch ( mad
->mad_hdr
.tid
[0] ) {
676 case IPOIB_TID_GET_PATH_REC
:
677 ipoib_recv_path_record ( ipoib
, &mad
->path_record
);
679 case IPOIB_TID_MC_MEMBER_REC
:
680 ipoib_recv_mc_member_record ( ipoib
, &mad
->mc_member_record
);
683 DBGC ( ipoib
, "IPoIB %p unwanted response:\n",
685 DBGC_HD ( ipoib
, mad
, sizeof ( *mad
) );
690 ipoib
->meta
.recv_fill
--;
695 * Refill IPoIB receive ring
697 * @v ipoib IPoIB device
699 static void ipoib_refill_recv ( struct ipoib_device
*ipoib
,
700 struct ipoib_queue_set
*qset
) {
701 struct ib_device
*ibdev
= ipoib
->ibdev
;
702 struct io_buffer
*iobuf
;
705 while ( qset
->recv_fill
< qset
->recv_max_fill
) {
706 iobuf
= alloc_iob ( IPOIB_MTU
);
709 if ( ( rc
= ib_post_recv ( ibdev
, qset
->qp
, iobuf
) ) != 0 ) {
718 * Poll IPoIB network device
720 * @v netdev Network device
722 static void ipoib_poll ( struct net_device
*netdev
) {
723 struct ipoib_device
*ipoib
= netdev
->priv
;
724 struct ib_device
*ibdev
= ipoib
->ibdev
;
726 ib_poll_cq ( ibdev
, ipoib
->meta
.cq
, ipoib_meta_complete_send
,
727 ipoib_meta_complete_recv
);
728 ib_poll_cq ( ibdev
, ipoib
->data
.cq
, ipoib_data_complete_send
,
729 ipoib_data_complete_recv
);
730 ipoib_refill_recv ( ipoib
, &ipoib
->meta
);
731 ipoib_refill_recv ( ipoib
, &ipoib
->data
);
735 * Enable/disable interrupts on IPoIB network device
737 * @v netdev Network device
738 * @v enable Interrupts should be enabled
740 static void ipoib_irq ( struct net_device
*netdev __unused
,
741 int enable __unused
) {
742 /* No implementation */
746 * Open IPoIB network device
748 * @v netdev Network device
749 * @ret rc Return status code
751 static int ipoib_open ( struct net_device
*netdev
) {
752 struct ipoib_device
*ipoib
= netdev
->priv
;
753 struct ib_device
*ibdev
= ipoib
->ibdev
;
756 /* Attach to broadcast multicast GID */
757 if ( ( rc
= ib_mcast_attach ( ibdev
, ipoib
->data
.qp
,
758 &ipoib
->broadcast_gid
) ) != 0 ) {
759 DBG ( "Could not attach to broadcast GID: %s\n",
764 /* Fill receive rings */
765 ipoib_refill_recv ( ipoib
, &ipoib
->meta
);
766 ipoib_refill_recv ( ipoib
, &ipoib
->data
);
772 * Close IPoIB network device
774 * @v netdev Network device
776 static void ipoib_close ( struct net_device
*netdev
) {
777 struct ipoib_device
*ipoib
= netdev
->priv
;
778 struct ib_device
*ibdev
= ipoib
->ibdev
;
780 /* Detach from broadcast multicast GID */
781 ib_mcast_detach ( ibdev
, ipoib
->data
.qp
, &ipoib
->broadcast_gid
);
783 /* FIXME: should probably flush the receive ring */
786 /** IPoIB network device operations */
787 static struct net_device_operations ipoib_operations
= {
789 .close
= ipoib_close
,
790 .transmit
= ipoib_transmit
,
796 * Join IPoIB broadcast group
798 * @v ipoib IPoIB device
799 * @ret rc Return status code
801 static int ipoib_join_broadcast_group ( struct ipoib_device
*ipoib
) {
802 struct ib_device
*ibdev
= ipoib
->ibdev
;
803 unsigned int delay_ms
;
806 /* Make sure we have some receive descriptors */
807 ipoib_refill_recv ( ipoib
, &ipoib
->meta
);
809 /* Send join request */
810 if ( ( rc
= ipoib_mc_member_record ( ipoib
, &ipoib
->broadcast_gid
,
812 DBGC ( ipoib
, "IPoIB %p could not send broadcast join: %s\n",
813 ipoib
, strerror ( rc
) );
817 /* Wait for join to complete. Ideally we wouldn't delay for
818 * this long, but we need the queue key before we can set up
819 * the data queue pair, which we need before we can know the
822 for ( delay_ms
= IPOIB_JOIN_MAX_DELAY_MS
; delay_ms
; delay_ms
-- ) {
824 ib_poll_cq ( ibdev
, ipoib
->meta
.cq
, ipoib_meta_complete_send
,
825 ipoib_meta_complete_recv
);
826 ipoib_refill_recv ( ipoib
, &ipoib
->meta
);
827 if ( ipoib
->broadcast_joined
)
830 DBGC ( ipoib
, "IPoIB %p timed out waiting for broadcast join\n",
839 * @v ibdev Infiniband device
840 * @ret rc Return status code
842 int ipoib_probe ( struct ib_device
*ibdev
) {
843 struct net_device
*netdev
;
844 struct ipoib_device
*ipoib
;
845 struct ipoib_mac
*mac
;
848 /* Allocate network device */
849 netdev
= alloc_ipoibdev ( sizeof ( *ipoib
) );
852 netdev_init ( netdev
, &ipoib_operations
);
853 ipoib
= netdev
->priv
;
854 ib_set_ownerdata ( ibdev
, netdev
);
855 netdev
->dev
= ibdev
->dev
;
856 memset ( ipoib
, 0, sizeof ( *ipoib
) );
857 ipoib
->netdev
= netdev
;
858 ipoib
->ibdev
= ibdev
;
860 /* Calculate broadcast GID */
861 memcpy ( &ipoib
->broadcast_gid
, &ipv4_broadcast_gid
,
862 sizeof ( ipoib
->broadcast_gid
) );
863 ipoib
->broadcast_gid
.u
.words
[2] = htons ( ibdev
->pkey
);
865 /* Allocate metadata queue set */
866 if ( ( rc
= ipoib_create_qset ( ipoib
, &ipoib
->meta
,
868 IPOIB_META_NUM_SEND_WQES
,
869 IPOIB_META_NUM_RECV_WQES
,
870 IB_GLOBAL_QKEY
) ) != 0 ) {
871 DBGC ( ipoib
, "IPoIB %p could not allocate metadata QP: %s\n",
872 ipoib
, strerror ( rc
) );
873 goto err_create_meta_qset
;
876 /* Join broadcast group */
877 if ( ( rc
= ipoib_join_broadcast_group ( ipoib
) ) != 0 ) {
878 DBGC ( ipoib
, "IPoIB %p could not join broadcast group: %s\n",
879 ipoib
, strerror ( rc
) );
880 goto err_join_broadcast_group
;
883 /* Allocate data queue set */
884 if ( ( rc
= ipoib_create_qset ( ipoib
, &ipoib
->data
,
886 IPOIB_DATA_NUM_SEND_WQES
,
887 IPOIB_DATA_NUM_RECV_WQES
,
888 ipoib
->data_qkey
) ) != 0 ) {
889 DBGC ( ipoib
, "IPoIB %p could not allocate data QP: %s\n",
890 ipoib
, strerror ( rc
) );
891 goto err_create_data_qset
;
894 /* Construct MAC address */
895 mac
= ( ( struct ipoib_mac
* ) netdev
->ll_addr
);
896 mac
->qpn
= htonl ( ipoib
->data
.qp
->qpn
);
897 memcpy ( &mac
->gid
, &ibdev
->port_gid
, sizeof ( mac
->gid
) );
899 /* Register network device */
900 if ( ( rc
= register_netdev ( netdev
) ) != 0 )
901 goto err_register_netdev
;
906 ipoib_destroy_qset ( ipoib
, &ipoib
->data
);
907 err_join_broadcast_group
:
908 err_create_data_qset
:
909 ipoib_destroy_qset ( ipoib
, &ipoib
->meta
);
910 err_create_meta_qset
:
911 netdev_nullify ( netdev
);
912 netdev_put ( netdev
);
917 * Remove IPoIB device
919 * @v ibdev Infiniband device
921 void ipoib_remove ( struct ib_device
*ibdev
) {
922 struct net_device
*netdev
= ib_get_ownerdata ( ibdev
);
923 struct ipoib_device
*ipoib
= netdev
->priv
;
925 unregister_netdev ( netdev
);
926 ipoib_destroy_qset ( ipoib
, &ipoib
->data
);
927 ipoib_destroy_qset ( ipoib
, &ipoib
->meta
);
928 netdev_nullify ( netdev
);
929 netdev_put ( netdev
);